This file explains all data processing and analysis steps for Using consumption and reward simulations to increase the appeal of plantbased foods. The RProject has a private library in order to make all steps computationally reproducible. I use the renv package for this. That means you will need to install the package and run the renv::restore command, see instructions here. This works best if you don’t have the library and staging folder within the renv folder.

# pacman makes it easier to load and install packages
if (!requireNamespace("pacman"))
  install.packages("pacman")

library(pacman)

# load packages
p_load(
  Rmisc,
  here,
  tidyverse,
  cowplot,
  DT,
  pastecs,
  knitr,
  lme4,
  afex,
  emmeans,
  influence.ME,
  MuMIn,
  BayesFactor,
  effects
)

# set seed
set.seed(42)

# set theme for ggplot
theme_set(theme_cowplot())
### FUNCTION 1 ###
# function that transforms feet and inches to cm
feet_inches_to_cm <- 
  function(feet, inches){
    feet_to_cm <- feet * 30.48
    inches_to_cm <- inches * 2.54
    cm <- feet_to_cm + inches_to_cm
    cm <- round(cm, digits = 0)
    
    return(cm)
  }

### FUNCTION 2 ###
# function that transforms stones and pounds to kg
stones_pounds_to_kg <-
  function(pounds, stones=NULL){
    if(missing(stones)){
      pounds_to_kg <- pounds * 0.453592
      kg <- pounds_to_kg
      kg <- round(kg, digits = 0)
      
      return(kg)
    }
    else{
      stones_to_kg <- stones * 6.35029
      pounds_to_kg <- pounds * 0.453592
      kg <- stones_to_kg + pounds_to_kg
      kg <- round(kg, digits = 0)
      
      return(kg)
    }
  }

### FUNCTION 3 ###
# function that gives summary statistics and a densityplot, with different levels of repeated vs. trait-like measures
describe_visualize <- 
  function(df, 
           variable, 
           repeated_measure = FALSE,
           want_summary = FALSE){
    
    variable <- enquo(variable)
    
    # specifies whether the variable we want to plot is a trait-like or repeated measure
    if (repeated_measure == FALSE){
      df <-
        df %>%
        group_by(pp) %>%
        slice(1) %>% 
        ungroup()
    } else {
      df <-
        df
    }
    
    # descriptive stats
    sum_stats <-
      df %>% 
      pull(!! variable) %>% 
      stat.desc()
    
    # plot
    plot <-
      ggplot(df, aes(x = !! variable)) +
        geom_density(color = "darkgrey", fill = "darkgrey") +
        geom_point(aes(x = !! variable, y = 0))
    
    # return the two (if both are wanted)
    if(want_summary == TRUE){
      return(list(kable(sum_stats), plot))
    } else{
      return(plot)
    }
  }

### FUNCTION 4 ###
# function that returns a table for trait-like categorical variables
my_table <-
  function(df, variable){
    variable <- enquo(variable)
    
    df %>% 
      group_by(pp) %>% 
      slice(1) %>% 
      pull(!! variable) %>% 
      table()
  }

### FUNCTION 5 ###
# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50

"%||%" <- function(a, b) {
  if (!is.null(a)) a else b
}

geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
                             position = "dodge", trim = TRUE, scale = "area",
                             show.legend = NA, inherit.aes = TRUE, ...) {
  layer(
    data = data,
    mapping = mapping,
    stat = stat,
    geom = GeomFlatViolin,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      trim = trim,
      scale = scale,
      ...
    )
  )
}

#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
GeomFlatViolin <-
  ggproto("GeomFlatViolin", Geom,
    setup_data = function(data, params) {
      data$width <- data$width %||%
        params$width %||% (resolution(data$x, FALSE) * 0.9)

      # ymin, ymax, xmin, and xmax define the bounding rectangle for each group
      data %>%
        group_by(group) %>%
        mutate(
          ymin = min(y),
          ymax = max(y),
          xmin = x,
          xmax = x + width / 2
        )
    },

    draw_group = function(data, panel_scales, coord) {
      # Find the points for the line to go all the way around
      data <- transform(data,
        xminv = x,
        xmaxv = x + violinwidth * (xmax - x)
      )

      # Make sure it's sorted properly to draw the outline
      newdata <- rbind(
        plyr::arrange(transform(data, x = xminv), y),
        plyr::arrange(transform(data, x = xmaxv), -y)
      )

      # Close the polygon: set first and last point the same
      # Needed for coord_polar and such
      newdata <- rbind(newdata, newdata[1, ])

      ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
    },

    draw_key = draw_key_polygon,

    default_aes = aes(
      weight = 1, colour = "grey20", fill = "white", size = 0.5,
      alpha = NA, linetype = "solid"
    ),

    required_aes = c("x", "y")
  )

### FUNCTION 6 ###
# creates raincloud plots
rc_plot <-
  function(
    df,
    measurement,
    variable,
    group,
    xlab,
    title,
    facet = NULL
  ) {
    rc_plot <-
    ggplot(
      df %>%
        filter(measure == measurement),
      aes_string(
        x = group,
        y = variable,
        fill = group,
        color = group
      )
    ) +
      geom_flat_violin(position = position_nudge(x = .2,
                                                 y = 0),
                       adjust = 2) +
      geom_point(position = position_jitter(width = .15),
                 size = .10) +
      ylab("Rating (0-100)") +
      xlab(xlab) +
      coord_flip() +
      guides(fill = FALSE,
             color = FALSE) +
      scale_color_brewer(palette = "Dark2") +
      scale_fill_brewer(palette = "Dark2") +
      ggtitle(title)
    
    if(!is.null(facet)){
      rc_plot <-
      rc_plot +
        facet_wrap(facet)
    }
    
    return(rc_plot)
  }

### FUNCTION 6 ###
# creates line plots
line_plot <-
  function(
    df,
    x_group,
    measure,
    group_by
  ) {
    
    measure <- enquo(measure)
    
    ggplot(df,
           aes_string(
             x = x_group,
             y = measure,
             group = group_by,
             color = group_by,
             linetype = group_by
           )
    ) +
      geom_line() +
      geom_point() +
      geom_errorbar(
        aes(
          ymin = !! measure - se,
          ymax = !! measure + se),
        width = .05) +
      scale_color_brewer(palette = "Dark2")
  }

### FUNCTION 7 ###
# function that calculates proportion of residuals above a cut-off for an lmer object
proportion_residuals <-
  function(model, cut_off){
    
    # scale the model residuals
    model_resid <- resid(model, scaled = TRUE)
    
    # take the absolute
    model_resid <- abs(model_resid)
    
    # select those below cut_off and divide by total number of residuals
    proportion <- sum(model_resid > cut_off) / length(model_resid)
    
    return(proportion)
  }

### FUNCTION 8 ###
# shows the proportions from the above function in a table
lmer_residuals <-
  function(lmer_model){
    
    # proportion of scaled residuals that +- 2, 2.5, and 3
    residuals_2 <- proportion_residuals(lmer_model, 2)
    residuals_25 <- proportion_residuals(lmer_model, 2.5)
    residuals_3 <- proportion_residuals(lmer_model, 3)
    
    # put those percentages into a tibble, add whether they pass a cut-off, and produce nice table
    resid_proportions <-
      tibble(
        standardized_cutoff = c(2, 2.5, 3),
        proportion_cutoff = c(.05, .01, .0001),
        proportion = c(
          residuals_2,
          residuals_25,
          residuals_3
        ),
        below_cutoff = proportion < proportion_cutoff
      )
    
    print(resid_proportions)
  }

### FUNCTION 8 ###
# function takes the formal outlier values on cook's distance from an lmer object and arranges them from highest to lowest
arrange_cook <-
  function(outliers, grouping_factor){
    cooks.distance(outliers) %>% 
      as_tibble(rownames = as.character(grouping_factor)) %>%
      rename(cooks_distance = V1) %>% 
      arrange(desc(cooks_distance)) %>% 
      head()
  }

1. Load and wrangle data

1.1 Load data

First, we load the data. For a codebook, see the [enter file name] file. Note that the Qualtrics output format is quite the nightmare for importing; found a solution here.

headers <- read_csv(
  here("data", "study2", "raw_data.csv"),
  col_names = FALSE,
  n_max = 1) %>%
  as.character() # variable names stored in a vector

raw_data <- read_csv(
  here("data", "study2", "raw_data.csv"),
  col_names = headers, # use that name vector here to name variables
  na = "",
  skip = 3 # those are the three rows that contain the variable info
  )

rm(headers) # clear names from workspace

nrow(raw_data) # how many people opened the survey
## [1] 183

The data frame is extremely wide; we have 1804 variables. This has several reasons:

  • Qualtrics data are generally in the wide format, so each stimulus gets a column rather than a row
  • What label type was assigned to which food type for desire ratings was counterbalanced, such that there were four sets, each containing 40 variables (= 160 total)
  • Participants only answered one set, but all four sets are contained in the data, meaning that participants will have entries for one set of variables, but only NA for the other sets
  • Each stimulus was timed, meaning Qualtrics measured four additional variables per stimulus: first click, last click, number of clicks, and after how much time the page was submitted
  • This means there are 160 (the stimuli) plus 160 (the four sets) x 4 (timing variables) = 640 variables just for desire ratings (total 800 desire variables)
  • This entire procedure also applies to simulation ratings, but simulations had two rating items, meaning there will be 160 stimuli x 2 (number of items) + 160 x 4 (timing questions) = 960 doubling the number of stimulus rating variables (1600 total rating variables)
  • The remaining 44 are demographic etc. variables

1.2 Wrangling

We need to get these data into the long format. Before that, let’s only keep those variables we need and simultaneously give them proper names. Also, the response_id already is a unique identifier, but not easy to read. I’ll add a participant number (pp).

Also, when reading in the raw data, read_csv gave a warning that there were duplicate column names. Namely, timing variables of the pb_1_s_choice_time trial are double. The second instance of those duplicate timing variables have been assigned a _1 appendix. I checked the Qualtrics survey and the raw data and there’s a typo in one of the variable names: The timing variables for the pb_2_s_choice trial are called pb_1_s_choice. Thus, pb_1_s_choice_time_X (X stands for the timing variable) should be pb_2_s_choice_time_X. pb_1_s_choice_time_X_1 should be stripped of the _1 suffix. I’ll change that manually below.

Last, because I’m cleaning the raw data, I’ll use a new data object: working_file

working_file <- 
  raw_data %>% 
  select(
    start_date = StartDate,
    end_date = EndDate,
    preview = Status,
    duration = `Duration (in seconds)`,
    finished = Finished,
    fullfil_criteria = InclusionConfirm,
    consent = ConsentProceed,
    group = CONDITION, # counterbalancing id
    hungry = HungerThirst_1,
    thirsty = HungerThirst_2,
    desire_instructions_time = `DInstructionsTime_Page Submit`,
    pb_1_n_choice_1:`mb_40_n_choice_Click Count`, # all desire variables
    simulations_instructions_time = `SimInstructionsTime_Page Submit`,
    pb_1_n_sim_1:`mb_40_n_sim_time_Click Count`, # all simulation variables
    age = Age,
    gender = Gender,
    height_choice = HeightUnit,
    height_cm = HeightCm_1,
    height_feet = HeightFI_1,
    height_inches = HeightFI_2,
    weight_choice = WeightUnit,
    weight_kilos = WeightKg_1,
    weight_stones = WeightStones_1,
    weight_pounds = WeightStones_2,
    weight_pounds_only = WeightPounds_1,
    diet = Diet,
    diet_details = DietOther,
    meat_per_week = meat_frequency,
    change_diet = less_meat_intention_1,
    allergies = Allergies,
    allergies_details = AllergyDetails,
    language_issues = LanguageComprehensio,
    language_issues_details = LanguageDifficulties,
    didnt_like = FoodPreference,
    study_about = StudyAim,
    technical_issues = TechnicalDifficultie,
    -contains("Click"), # omit click count variables
  ) %>%
  rename(
    `pb_2_s_choice_time_Page Submit` = `pb_1_s_choice_time_Page Submit`,
    `pb_1_s_choice_time_Page Submit`= `pb_1_s_choice_time_Page Submit_1`
  ) %>% 
  mutate(pp = paste0("pp_", row_number())) %>%
  select(# add participant number and set it as first column
    pp,
    everything()
  )

When inspecting the data and variable names visually, I see that one section of timing variable wasn’t named after the trial. The timing variable for pb_6_c_choice begins with Q725. I rename that timing variable.

working_file <-
  working_file %>% 
  rename(
    pb_6_c_choice_time = `Q725_Page Submit`
  )

Next, I assign the correct variable type. In addition, I exported the Qualtrics data with numbers as output, so I will reintroduce the factor levels for factor variables.

working_file <-
  working_file %>% 
  mutate_at(
    vars(
      pp,
      preview,
      finished:group,
      gender,
      height_choice,
      weight_choice,
      diet,
      allergies,
      language_issues
    ),
    list(~ as.factor(.))
  ) %>% 
  mutate(
    preview = fct_recode(
      preview,
      yes = "1",
      no = "0"
    ),
    finished = fct_recode(
      finished,
      no = "0",
      yes = "1"
    ),
    gender = fct_recode(
      gender,
      male = "1",
      female = "2",
      other = "3"
    ),
    diet = fct_recode(
      diet,
      omnivore = "1",
      pescatarian = "2",
      vegetarian = "3",
      vegan = "4",
      other = "5"
    ),
    height_choice = fct_recode(
      height_choice,
      cm = "1",
      feet_inches = "2"
    ),
    weight_choice = fct_recode(
      weight_choice,
      kg = "1",
      stones_pounds = "2",
      pounds_only = "3"
    )
  ) %>% 
  mutate_at(
    vars(
      fullfil_criteria,
      consent,
      allergies,
      language_issues
    ),
    list(
      ~ fct_recode(
        .,
        yes = "1",
        no = "2"
      )
    )
  )

Because the study was conducted in the UK, we need to transform height and weight to cm and kg, respectively.

working_file <-
  working_file %>% 
  mutate(
    height_cm = case_when(
      height_choice == "cm" ~ height_cm,
      height_choice == "feet_inches" ~ feet_inches_to_cm(height_feet, height_inches)
    ),
    weight_kilos = case_when(
      weight_choice == "kg" ~ weight_kilos,
      weight_choice == "stones_pounds" ~ stones_pounds_to_kg(weight_pounds, weight_stones),
      weight_choice == "pounds_only" ~ stones_pounds_to_kg(weight_pounds_only) 
    )
  )

Now that the data are cleaned, I can finally transform them to the long format. Currently, the measurements of desire and simulations are in the following format: pb_1_n_choice_1. The components (separated by underscores) of that format mean:

  • food type: pb (plant-based) or mb(meat-based)
  • stimulus number
  • label type: n (neutral), s (sensory), c (contextual), h (health-positive)
  • measurement/DV: choice (desire) or sim (simulations)
  • Number of the item/time: always _1 for desire items, _1 and _2 for simulation items

Let’s remove the Page Submit appendices from their variable names. Some trials don’t have a time suffix before the Page Submit suffix, because the time suffix wasn’t added during coding. Instead, they have a format like this pb_7_s_sim_Page Submit. Therefore, I first remove Page Submit from all variables that already have a time identifier in the name, and then replace Page Submit with time for those who don’t have a time identifier already.

working_file <-
  working_file %>% 
    rename_at(
    vars(
      ends_with("time_Page Submit")
    ),
    list(
      ~ str_replace(
        ., "_Page Submit", ""
      )
    )
  ) %>% 
  rename_at(
    vars(
      ends_with("Page Submit")
    ),
    list(
      ~ str_replace(
        ., "Page Submit", "time"
      )
    )
  )

Here, I make sure there are no more “Page Submits” in any of the variable names. Weirdly, Qualtrics gave one variable a _1 appendix, which is why the previous command missed the Page Submit. This variable seems to be a duplicate: pb_1_s_choice has two Page Submit variables, but only one of them (with the _1 appendix) is on the same line as the scores on pb_1_s_choice_1. My guess it that this variable is left over from a test run, after which the Qualtrics survey was changed slightly. I’ll remove the double Page Submit and maintain and rename the correct one.

# check
working_file %>% 
  select(contains("Page Submit")) %>% 
  names()
## character(0)
# inspect that trial
working_file %>% 
  select(starts_with("pb_1_s_choice"))
## # A tibble: 183 x 2
##    pb_1_s_choice_1 pb_1_s_choice_time
##              <dbl>              <dbl>
##  1              NA              NA   
##  2              NA              NA   
##  3              NA              NA   
##  4              72               1.66
##  5              NA              NA   
##  6              NA              NA   
##  7              NA              NA   
##  8              NA              NA   
##  9              NA              NA   
## 10              NA              NA   
## # ... with 173 more rows

1.3 Tidy data

Let’s get to turning the data into the long format. Because we used four sets of stimuli (counterbalancing what stimulus belongs to what food type and label type), one quarter of the participants gave responses to a quarter of the measurement variables; the other three quarters of participant gave responses to the other three quarters.

Therefore, participants will have missing values on those variables that belonged to the other sets (i.e., the other counterbalancing conditions). Luckily, the pivot_longer function is amazing, so we can drop those measurements per participants with the values_drop_na argument. This command has the nice side effect of also excluding those who did not consent to participate (because they have NAs everywhere.)

In their current form, the trial variable names contain information about food type, stimulus number, label type, and the measure (choice = desire vs. simulations). Moreoever, the last appendix to the variable name (_1/_2/time) tells us whether the variable contains a rating for the first item or second item or the timing variable. I first turn these data into the long format, such that the new type variable tells us what the value variable is for.

Then, I spread the type and value variables, such that we follow tidy conventions: Each row is one observation (aka trial). A trail where desire was measured gets its own row and a trial where simulations were measured gets its own row. Both rows will contain the value of the rating on either one item (desire) or two items (simulations), plus a value for time on that trial.

working_file <- 
  
  # turn into long format
  working_file %>% 
  pivot_longer(
    cols = c(contains("pb_"), contains("mb_")),
    names_to = c( # specificy the variables to be formed from the current variable names
      "food_type", 
      "stimulus_no",
      "label_type",
      "measure",
      "type" # what type is the measurement, item number 1, item number 2, or time?
      ),
    values_to = c( # the values, which will be item number (1 or 2) and the timer per trial
      "value"
      ),
    names_sep = "_",
    values_drop_na = TRUE # do not include empty entries due to counterbalancing as rows in the long format
  ) %>%
  
  # then spread the type and value variables
  pivot_wider(
    names_from = "type",
    values_from = "value"
  ) %>% 

  # give proper variable names, labels, and variable types
  mutate(
    stimulus_no = as.numeric(stimulus_no)
  ) %>% 
  mutate(
    food_type = fct_recode(
      as.factor(food_type),
      meat_based = "mb",
      plant_based = "pb",
    ), # (neutral), s (sensory), c (contextual), h (health-positive)
    label_type = fct_recode(
      as.factor(label_type),
      contextual = "c",
      health_positive = "h",
      neutral = "n",
      sensory = "s"
    ),
    measure = fct_recode(
      as.factor(measure),
      desire = "choice",
      simulations = "sim"
    )
  ) %>%
  rename(
    rating1 = "1",
    rating2 = "2"
  ) %>% 
  select( # arbitrary, but I like to order the variables roughly in the order they were collected
    pp:simulations_instructions_time,
    food_type:rating1,
    rating2,
    time,
    everything()
  )

Last, I load the food items and add them to the working file.

foods <- 
  read_csv(
    here("data", "study2", "food_items.csv"),
    col_types = list("f", "n", "f")
  )

working_file <- 
  left_join(
    working_file,
    foods,
    by = c("food_type", "stimulus_no")
  ) %>% 
  mutate( # joining turned food type into character
    food_type = as.factor(food_type)
  ) %>% 
  select(
    pp:stimulus_no,
    food,
    everything()
  )

Alright, the data are in a tidy format. Below, I show the data of a random participant for illustration.

2. Exclusions

2.1 Exclude test runs

There are still a couple of test runs left. We exclude them by deselecting preview entries.

# before excluding testruns
length(unique(working_file$pp))
## [1] 177
working_file <- 
  working_file %>% 
  filter(preview == "no")

# after exclusion
length(unique(working_file$pp))
## [1] 173

2.2 Exclude incomplete surveys

Now exclude those who didn’t finish the survey.

length(unique(working_file$pp))
## [1] 173
working_file <-
  working_file %>% 
  filter(finished == "yes")

length(unique(working_file$pp))
## [1] 171

Out of experience, Qualtrics isn’t very good at determining whether a survey is finished or not. We double check this: If indeed all participants gave all ratings, there should be 80 rows for each of the remaining 171 participants: 40 desire rows and 40 simulations rows (the simulation rows have two ratings). Furthermore, there should be few, if any, NAs remaining on any of the ratings. And indeed, there’re no unexpected missing values. The missings on rating2 result from half the rows (the desire trials) only having on item, so 171 participants x 40 desire rows = 6840

# does each participant have 80 rows?
working_file %>% 
  group_by(pp) %>% 
  count() %>%
  ungroup() %>% 
  summarize(
    all_there = sum(n == 80) == length(unique(working_file$pp))
  )
## # A tibble: 1 x 1
##   all_there
##   <lgl>    
## 1 TRUE
# are there NAs left on the rating or timing variables?
working_file %>% 
  select(rating1, rating2, time) %>% 
  summarize_all(
    list(~ sum(is.na(.)))
  )
## # A tibble: 1 x 3
##   rating1 rating2  time
##     <int>   <int> <int>
## 1       0    6840     0

2.3 Response patterns

Next I check for response patterns (aka those who were “straightlining” and have little to no variance in their ratings.)

Participant pp_76 has zero variance on their ratings, which is suspicious. Three more participants have an SD of less than two, which is rather low for a 100 VAS scale. Two participants gave the same response on 90% of trials.

sd_per_group <- 
  working_file %>% 
  group_by(pp, measure) %>% 
  summarize_at(
    vars(
      rating1, rating2
    ),
    list(mean, sd, min, max)
  )

# check lowest sd for rating 1
sd_per_group %>%  
  arrange(rating1_fn2)
## # A tibble: 342 x 10
## # Groups:   pp [171]
##    pp    measure rating1_fn1 rating2_fn1 rating1_fn2 rating2_fn2
##    <fct> <fct>         <dbl>       <dbl>       <dbl>       <dbl>
##  1 pp_76 simula~        95          95          0           0   
##  2 pp_1~ simula~        59.7        59.7        1.47        1.49
##  3 pp_1~ simula~        76.4        76.4        1.72        1.46
##  4 pp_64 simula~        65.1        65.0        1.95        1.99
##  5 pp_11 simula~        67.2        59.5        3.15        5.27
##  6 pp_1~ simula~        96.6        91.0        3.51        9.43
##  7 pp_22 simula~        93.3        28.9        4.10       24.4 
##  8 pp_1~ simula~        79.9        78.8        4.16        7.25
##  9 pp_1~ simula~        76.5        27.6        4.78       10.0 
## 10 pp_41 simula~        96.0        50.8        5.39       35.8 
## # ... with 332 more rows, and 4 more variables: rating1_fn3 <dbl>,
## #   rating2_fn3 <dbl>, rating1_fn4 <dbl>, rating2_fn4 <dbl>
# check lowest sd for rating 2
sd_per_group %>% 
  arrange(rating2_fn2)
## # A tibble: 342 x 10
## # Groups:   pp [171]
##    pp    measure rating1_fn1 rating2_fn1 rating1_fn2 rating2_fn2
##    <fct> <fct>         <dbl>       <dbl>       <dbl>       <dbl>
##  1 pp_76 simula~        95          95          0           0   
##  2 pp_1~ simula~        76.4        76.4        1.72        1.46
##  3 pp_1~ simula~        59.7        59.7        1.47        1.49
##  4 pp_64 simula~        65.1        65.0        1.95        1.99
##  5 pp_1~ simula~        89.4        92.3       15.6         4.93
##  6 pp_11 simula~        67.2        59.5        3.15        5.27
##  7 pp_96 simula~        60.8        54.9        5.66        5.33
##  8 pp_1~ simula~        67.5        68.3        6.39        6.48
##  9 pp_71 simula~        88          87.8        6.92        6.81
## 10 pp_1~ simula~        79.9        78.8        4.16        7.25
## # ... with 332 more rows, and 4 more variables: rating1_fn3 <dbl>,
## #   rating2_fn3 <dbl>, rating1_fn4 <dbl>, rating2_fn4 <dbl>
# check whether anyone selected a VAS value more than 90% of the time
working_file %>% 
  group_by(pp, measure) %>% 
  count(rating1) %>% 
  filter(n > 40*0.9)
## # A tibble: 2 x 4
## # Groups:   pp, measure [2]
##   pp     measure     rating1     n
##   <fct>  <fct>         <dbl> <int>
## 1 pp_168 simulations      60    38
## 2 pp_76  simulations      95    40
working_file %>%
  filter(measure == "simulations") %>% 
  group_by(pp) %>% 
  count(rating2) %>% 
  filter(n > 40*0.9)
## # A tibble: 2 x 3
## # Groups:   pp [2]
##   pp     rating2     n
##   <fct>    <dbl> <int>
## 1 pp_168      60    37
## 2 pp_76       95    40

I’ll inspect those four participants more closely by plotting their responses. Based on the response patterns, it looks like the four participants ran out of patience during the second part of the study. Their responses have a healthy spread when rating desire, but gets extremely narrow when rating the two simulation ratings. I think these respondents might just have been clicking through, especially pp_76 who selected the maximum value on the scale.

ggplot(working_file %>% filter(pp %in% c("pp_76", "pp_168", "pp_164", "pp_64")), aes(x = rating1)) +
  geom_density(color = "darkgrey", fill = "darkgrey") +
  geom_point(aes(x = rating1, y = 0)) +
  facet_grid(measure ~ pp)

ggplot(working_file %>% filter(pp %in% c("pp_76", "pp_168", "pp_164", "pp_64")) %>% filter(measure == "simulations"),
       aes(x = rating2)) +
  geom_density(color = "darkgrey", fill = "darkgrey") +
  geom_point(aes(x = rating2, y = 0)) +
  facet_wrap(~ pp)

Based on the graph and the info above, I’ll exclude pp_76 in line with the preregistration.

working_file <- 
  working_file %>% 
  filter(!pp %in% c("pp_76"))

2.4 Don’t fulfill inclusion

Here we check whether participants indeed fulfill the inclusion criteria. The criteria were:

  • living in the UK
  • 18-70 years old
  • omnivores
  • no current or history of eating disorder
  • not on a diet

Eight participants indicate that they’re not omnivores, despite saying so in the intro. When we look at their meat consumption, all but two eat meat despite not calling themselves omnivores. It’s hard to tell what to do with those two participants: they said they fulfilled the inclusion criteria, but apparently don’t eat meat that often. I’ll run the analyses with and without them (pp_77 andpp_87).

# check whether participants themselves said they were fulfilling the criteria
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  pull(fullfil_criteria) %>% 
  table()
## .
## yes  no 
## 170   0
# check age
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  summarize(
    lower_range = min(age),
    upper_range = max(age)
  )
## # A tibble: 1 x 2
##   lower_range upper_range
##         <dbl>       <dbl>
## 1          18          68
# check diet
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  count(diet)
## # A tibble: 4 x 2
##   diet            n
##   <fct>       <int>
## 1 omnivore      162
## 2 pescatarian     3
## 3 vegetarian      3
## 4 other           2
# let's check their meat consumption
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  filter(diet != "omnivore") %>% 
  select(pp, diet, diet_details, meat_per_week)
## # A tibble: 8 x 4
##   pp     diet        diet_details               meat_per_week
##   <fct>  <fct>       <chr>                              <dbl>
## 1 pp_172 other       Plant based                          1  
## 2 pp_26  vegetarian  <NA>                                 4  
## 3 pp_36  pescatarian <NA>                                 2  
## 4 pp_39  pescatarian <NA>                                 0.2
## 5 pp_60  pescatarian <NA>                                 2  
## 6 pp_77  vegetarian  <NA>                                 0  
## 7 pp_82  other       I basically eat everything          14  
## 8 pp_87  vegetarian  <NA>                                 0

2.5 Check for rushed responses

Next, I want to see whether any participants rushed through the survey (not preregistered). If anyone spent only a half a second per trial, I wouldn’t take their data seriously. The fastest participant still spent about three seconds on each trial, which I don’t find problematic.

time_means <- 
  working_file %>% 
  group_by(pp) %>% 
  summarize(
    mean_rt = mean(time),
    sd_srt = sd(time)
  ) %>% 
  arrange(mean_rt)

# plot response time
time_means %>% 
  ggplot(aes(x = mean_rt)) +
  geom_density(color = "darkgrey", fill = "darkgrey") +
  geom_point(aes(x = mean_rt, y = 0))

3. Describe and visualize

3.1 Meta-data

How long did participants spend on the survey? Median duration is around ~ 15 minutes.

describe_visualize(
  working_file,
  duration,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.700000e+02
## nbr.null        0.000000e+00
## nbr.na          0.000000e+00
## min             4.560000e+02
## max             2.736000e+03
## range           2.280000e+03
## sum             1.709590e+05
## median          9.060000e+02
## mean            1.005641e+03
## SE.mean         2.855578e+01
## CI.mean.0.95    5.637197e+01
## var             1.386235e+05
## std.dev         3.723218e+02
## coef.var        3.702332e-01
## 
## [[2]]

How long did they spend on the instructions for the desire and simulation ratings, respectively? Overall not long (~ 15 seconds), but some apparently left the browser window open for quite some time.

describe_visualize(
  working_file,
  desire_instructions_time,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          170.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.1110000
## max              212.4570000
## range            211.3460000
## sum             3532.7820000
## median            17.7360000
## mean              20.7810706
## SE.mean            1.5694284
## CI.mean.0.95       3.0982094
## var              418.7279556
## std.dev           20.4628433
## coef.var           0.9846867
## 
## [[2]]

describe_visualize(
  working_file,
  simulations_instructions_time,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          170.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min                1.5970000
## max              112.2710000
## range            110.6740000
## sum             2985.1670000
## median            13.7355000
## mean              17.5598059
## SE.mean            1.1825514
## CI.mean.0.95       2.3344752
## var              237.7327190
## std.dev           15.4185836
## coef.var           0.8780612
## 
## [[2]]

Were there language issues? Barely, and the qualitative answers don’t give cause for concern.

my_table(working_file, language_issues)
## .
## yes  no 
##   2 168
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  filter(language_issues == "yes") %>% 
  pull(language_issues_details)
## [1] "mistake ment to click no" "none"

3.2 Demographic information

First, I look at hunger and thirst ratings. Participants seemed to be more thirsty than hungry.

describe_visualize(
  working_file,
  hungry,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          170.0000000
## nbr.null          15.0000000
## nbr.na             0.0000000
## min                0.0000000
## max              100.0000000
## range            100.0000000
## sum             7482.0000000
## median            50.5000000
## mean              44.0117647
## SE.mean            2.1713893
## CI.mean.0.95       4.2865405
## var              801.5383223
## std.dev           28.3114521
## coef.var           0.6432701
## 
## [[2]]

describe_visualize(
  working_file,
  thirsty,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          170.0000000
## nbr.null           5.0000000
## nbr.na             0.0000000
## min                0.0000000
## max              100.0000000
## range            100.0000000
## sum             8731.0000000
## median            58.0000000
## mean              51.3588235
## SE.mean            1.8268515
## CI.mean.0.95       3.6063884
## var              567.3556909
## std.dev           23.8192294
## coef.var           0.4637807
## 
## [[2]]

What is the gender distribution in our sample? Mostly women.

my_table(working_file, gender)
## .
##   male female  other 
##     56    113      1

Age distribution.

describe_visualize(
  working_file,
  age,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          170.0000000
## nbr.null           0.0000000
## nbr.na             0.0000000
## min               18.0000000
## max               68.0000000
## range             50.0000000
## sum             5390.0000000
## median            29.0000000
## mean              31.7058824
## SE.mean            0.8435850
## CI.mean.0.95       1.6653216
## var              120.9780717
## std.dev           10.9990032
## coef.var           0.3469073
## 
## [[2]]

What’s the height distribution? One participant indicated 1.76 cm height, which clearly is meant to be 176. I change that manually. Another participant didn’t want to provide their height (has both height and weight at 0), so I’ll set those to NA.

describe_visualize(
  working_file,
  height_cm,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.700000e+02
## nbr.null        1.000000e+00
## nbr.na          0.000000e+00
## min             0.000000e+00
## max             1.960000e+02
## range           1.960000e+02
## sum             2.869976e+04
## median          1.700000e+02
## mean            1.688221e+02
## SE.mean         1.621308e+00
## CI.mean.0.95    3.200625e+00
## var             4.468689e+02
## std.dev         2.113927e+01
## coef.var        1.252163e-01
## 
## [[2]]

working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  filter(height_cm < 150) %>% 
  select(pp, height_choice, height_cm, weight_choice, weight_kilos)
## # A tibble: 4 x 5
##   pp     height_choice height_cm weight_choice weight_kilos
##   <fct>  <fct>             <dbl> <fct>                <dbl>
## 1 pp_149 cm               120    stones_pounds           66
## 2 pp_66  cm                 0    kg                       0
## 3 pp_73  cm                 1.76 kg                      78
## 4 pp_92  cm               149    stones_pounds           58
# recode one participant and set the other one to NA
working_file <-
  working_file %>% 
  mutate(
    height_cm = case_when(
      height_cm == 1.76 ~ 176,
      height_cm == 0 ~ NA_real_,
      TRUE ~ height_cm
    ),
    weight_kilos = if_else(weight_kilos == 0, NA_real_, weight_kilos)
  )

describe_visualize(
  working_file,
  height_cm,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.690000e+02
## nbr.null        0.000000e+00
## nbr.na          1.000000e+00
## min             1.200000e+02
## max             1.960000e+02
## range           7.600000e+01
## sum             2.887400e+04
## median          1.700000e+02
## mean            1.708521e+02
## SE.mean         8.064551e-01
## CI.mean.0.95    1.592092e+00
## var             1.099125e+02
## std.dev         1.048392e+01
## coef.var        6.136250e-02
## 
## [[2]]

Let’s look at the weight. There’s one participant who indicated to weigh 38 kilos. Most participants who weight less than 50kg also have below average height, so I’m reluctant to set values to NA here.

describe_visualize(
  working_file,
  weight_kilos,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         1.690000e+02
## nbr.null        0.000000e+00
## nbr.na          1.000000e+00
## min             3.800000e+01
## max             1.320000e+02
## range           9.400000e+01
## sum             1.248230e+04
## median          7.200000e+01
## mean            7.385976e+01
## SE.mean         1.347448e+00
## CI.mean.0.95    2.660112e+00
## var             3.068391e+02
## std.dev         1.751682e+01
## coef.var        2.371633e-01
## 
## [[2]]

working_file %>% 
  filter(weight_kilos < 50) %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  ungroup() %>% 
  select(pp, weight_kilos, height_cm)
## # A tibble: 8 x 3
##   pp     weight_kilos height_cm
##   <fct>         <dbl>     <dbl>
## 1 pp_105         43.8       168
## 2 pp_148         46         160
## 3 pp_16          47         164
## 4 pp_168         45         160
## 5 pp_18          45         163
## 6 pp_20          38         157
## 7 pp_22          47         157
## 8 pp_77          48         152

Next, some information about their diet.

  • participants seem to be divided on whether they’re trying to change their diet
  • frequent meat eaters
  • a few allergies
# how often they eat meat per week
describe_visualize(
  working_file,
  meat_per_week,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val          169.0000000
## nbr.null           2.0000000
## nbr.na             1.0000000
## min                0.0000000
## max               16.0000000
## range             16.0000000
## sum             1222.2000000
## median             7.0000000
## mean               7.2319527
## SE.mean            0.2927428
## CI.mean.0.95       0.5779285
## var               14.4830206
## std.dev            3.8056564
## coef.var           0.5262281
## 
## [[2]]

# to what extent participants are currently trying to change their diets
describe_visualize(
  working_file,
  change_diet,
  FALSE,
  TRUE
)
## [[1]]
## 
## 
##                           x
## -------------  ------------
## nbr.val          170.000000
## nbr.null           0.000000
## nbr.na             0.000000
## min                1.000000
## max              100.000000
## range             99.000000
## sum             6204.310000
## median            32.790000
## mean              36.495941
## SE.mean            2.428399
## CI.mean.0.95       4.793904
## var             1002.511055
## std.dev           31.662455
## coef.var           0.867561
## 
## [[2]]

# whether participants have food allergies
my_table(working_file, allergies)
## .
## yes  no 
##   7 163
# details of allergies
working_file %>% 
  group_by(pp) %>% 
  slice(1) %>% 
  filter(allergies == "yes") %>% 
  select(pp, allergies_details)
## # A tibble: 7 x 2
## # Groups:   pp [7]
##   pp     allergies_details        
##   <fct>  <chr>                    
## 1 pp_113 peanut allergy           
## 2 pp_16  Strawberry and kiwi fruit
## 3 pp_22  Dairy                    
## 4 pp_77  nut allergy              
## 5 pp_86  fizzy drinks             
## 6 pp_91  slight lactose intolerant
## 7 pp_93  Rice

3.3 Ratings

Next, I visualize the ratings on desire and simulations, respectively. I’ll first visualize and describe them overall, then per factor level (as in two main effects of label type and food type), and then the interaction.

3.3.1 Desire

First, I inspect the overall desire ratings, regardless of label or food type. The question wording might have invited participants to think about the answers almost in a binary way. We’ll need to check the model diagnostics later.

describe_visualize(
  working_file %>% 
    filter(measure == "desire"),
  rating1,
  TRUE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         6.800000e+03
## nbr.null        7.070000e+02
## nbr.na          0.000000e+00
## min             0.000000e+00
## max             1.000000e+02
## range           1.000000e+02
## sum             3.560780e+05
## median          5.900000e+01
## mean            5.236441e+01
## SE.mean         3.905958e-01
## CI.mean.0.95    7.656901e-01
## var             1.037443e+03
## std.dev         3.220936e+01
## coef.var        6.151001e-01
## 
## [[2]]

Next, a raincloud plot with ratings per label type. Doesn’t look like there’s much of a difference, but hard to tell without means and SEs. The descriptives show small differences, in the range of two to three scale points.

rc_plot(
  working_file,
  "desire",
  "rating1",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Desire Ratings"
  )

# raw means and SDs (without taking grouping by PP into account)
working_file %>% 
  filter(measure == "desire") %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(rating1, na.rm = TRUE),
    sd = sd(rating1, na.rm = TRUE)
  )
## # A tibble: 4 x 3
##   label_type       mean    sd
##   <fct>           <dbl> <dbl>
## 1 contextual       52.4  32.7
## 2 health_positive  50.7  31.5
## 3 neutral          52.5  32.4
## 4 sensory          53.8  32.3
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>% 
  filter(measure == "desire") %>% 
  group_by(pp, label_type) %>%
  summarize(mean_agg = mean(rating1, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 4 x 3
##   label_type       mean    sd
##   <fct>           <dbl> <dbl>
## 1 contextual       52.4  15.2
## 2 health_positive  50.7  14.5
## 3 neutral          52.5  13.4
## 4 sensory          53.8  14.7

Let’s inspect the ratings per food type. People seem to like meat-based labels more.

rc_plot(
  working_file,
  "desire",
  "rating1",
  "food_type",
  "Food Type",
  "Raincloud Plot of Food Desire Ratings"
  )

# raw means and SDs
working_file %>% 
  filter(measure == "desire") %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(rating1, na.rm = TRUE),
    sd = sd(rating1, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   57.9  31.9
## 2 plant_based  46.8  31.5
# aggregated means and SDs
working_file %>% 
  filter(measure == "desire") %>% 
  group_by(pp, food_type) %>%
  summarize(mean_agg = mean(rating1, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   57.9  13.3
## 2 plant_based  46.8  15.3

Next, I inspect the interaction of label type and food type for desire ratings. The raincloud plots are not that easy to read unless I add CI and etc., so for a quicker visualization I use a line plot on the aggregated means with within-subject standard error (from the Rmisc::summarySEwithin function).

Just by inspecting the plot, it looks like there are two main effects, but no interactions: Meat-based foods are generally more desirable than plant-based foods and the differences between label types are very similar for each food type.

rc_plot(
  working_file,
  "desire",
  "rating1",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Desire Ratings",
  "food_type"
  )

# get summary statistics
summary_desire <-
  summarySEwithin(
    data = working_file %>%
      filter(measure == "desire") %>%
      group_by(pp, label_type, food_type) %>%
      summarize(rating1 = mean(rating1, na.rm = TRUE)),
    measurevar = "rating1",
    withinvars = c("label_type", "food_type"),
    idvar = "pp"
  )

# have a look at means and SDs
summary_desire
##        label_type   food_type   N  rating1       sd       se       ci
## 1      contextual  meat_based 170 57.63059 17.33745 1.329722 2.625004
## 2      contextual plant_based 170 47.25647 15.94229 1.222718 2.413768
## 3 health_positive  meat_based 170 56.06118 15.99569 1.226814 2.421853
## 4 health_positive plant_based 170 45.31059 15.50267 1.189001 2.347207
## 5         neutral  meat_based 170 58.45647 15.09830 1.157987 2.285982
## 6         neutral plant_based 170 46.57529 14.80140 1.135215 2.241029
## 7         sensory  meat_based 170 59.64118 15.71061 1.204949 2.378690
## 8         sensory plant_based 170 47.98353 14.83867 1.138074 2.246673
# line graph
line_plot(
  summary_desire,
  "label_type",
  rating1,
  "food_type"
)

Last, I want to see variance by pp and by food. The boxplots show that there is quite a lot of variation for both factors, which is why we should model them as random effects.

ggplot(
  working_file %>% filter(measure == "desire"),
  aes(
    x = pp,
    y = rating1
  )
) +
  geom_boxplot()

ggplot(
  working_file %>% filter(measure == "desire"),
  aes(
    x = food,
    y = rating1
  )
) +
  geom_boxplot()

3.3.2 Simulations

We had two items for simulations, rating1 and rating2. In the preregistration we said that we would take their mean (aka combine them into one rating) if they were highly correlated (i.e., > r = .60). So let’s check those correlations first.

They are right at the threshold that we specified. If we take the correlation between ratings on each trial, it’s .59. If we aggregate by participant first, it’s .63. I think together with the conceptual similarity, this is justification enough to combine them into one rating (plus, it makes it much easier to interpret one model compared to two models, one for each item).

# on the entire data (so per trial)
working_file %>% 
  filter(measure == "simulations") %>% 
  summarize(
    raw_r = cor(rating1, rating2)
  )
## # A tibble: 1 x 1
##   raw_r
##   <dbl>
## 1 0.588
# aggregating ratings per participant first
working_file %>%
  filter(measure == "simulations") %>% 
  group_by(pp) %>% 
  summarize(
    rating1_mean = mean(rating1),
    rating2_mean = mean(rating2)
  ) %>% 
  summarize(
    aggregated_r = cor(rating1_mean, rating2_mean)
  )
## # A tibble: 1 x 1
##   aggregated_r
##          <dbl>
## 1        0.630
# combine them into one rating
working_file <- 
  working_file %>% 
  mutate(
    rating3 = (rating1 + rating2) / 2
  ) %>% 
  select(
    pp:rating2,
    rating3,
    everything()
  )

# inspect aggregated M and SD per condition
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(pp, label_type) %>%
  summarize(mean_agg = mean(rating3, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 4 x 3
##   label_type       mean    sd
##   <fct>           <dbl> <dbl>
## 1 contextual       61.2  15.3
## 2 health_positive  57.8  15.2
## 3 neutral          59.3  14.8
## 4 sensory          60.6  16.3

First, I inspect the overall simulation ratings, regardless of label or food type. Simulations, overall, have higher mean ratings than desire and is less pronounced at the zero and 100 values.

describe_visualize(
  working_file %>% 
    filter(measure == "simulations"),
  rating3,
  TRUE,
  TRUE
)
## [[1]]
## 
## 
##                            x
## -------------  -------------
## nbr.val         6.800000e+03
## nbr.null        2.480000e+02
## nbr.na          0.000000e+00
## min             0.000000e+00
## max             1.000000e+02
## range           1.000000e+02
## sum             4.062260e+05
## median          6.250000e+01
## mean            5.973912e+01
## SE.mean         3.183379e-01
## CI.mean.0.95    6.240419e-01
## var             6.891054e+02
## std.dev         2.625082e+01
## coef.var        4.394243e-01
## 
## [[2]]

Next, a raincloud plot with ratings per label type. Again, the differences between conditions are rather small.

rc_plot(
  working_file,
  "simulations",
  "rating3",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Simulation Ratings"
  )

# raw means and SDs (without taking grouping by PP into account)
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(rating3, na.rm = TRUE),
    sd = sd(rating3, na.rm = TRUE)
  )
## # A tibble: 4 x 3
##   label_type       mean    sd
##   <fct>           <dbl> <dbl>
## 1 contextual       61.2  25.9
## 2 health_positive  57.8  26.1
## 3 neutral          59.3  26.4
## 4 sensory          60.6  26.4
# means (unchanged) and SDs after aggregated per participant (to make it comparable to RM ANOVA)
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(pp, label_type) %>%
  summarize(mean_agg = mean(rating3, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(label_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 4 x 3
##   label_type       mean    sd
##   <fct>           <dbl> <dbl>
## 1 contextual       61.2  15.3
## 2 health_positive  57.8  15.2
## 3 neutral          59.3  14.8
## 4 sensory          60.6  16.3

Let’s inspect the ratings per food type. Again, participants seemed to simulate more for meat-based foods.

rc_plot(
  working_file,
  "simulations",
  "rating3",
  "food_type",
  "Food Type",
  "Raincloud Plot of Food Simulation Ratings"
  )

# raw means and SDs
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(rating3, na.rm = TRUE),
    sd = sd(rating3, na.rm = TRUE)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   63.2  25.6
## 2 plant_based  56.2  26.4
# aggregated means and SDs
working_file %>% 
  filter(measure == "simulations") %>% 
  group_by(pp, food_type) %>%
  summarize(mean_agg = mean(rating3, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(food_type) %>% 
  summarize(
    mean = mean(mean_agg),
    sd = sd(mean_agg)
  )
## # A tibble: 2 x 3
##   food_type    mean    sd
##   <fct>       <dbl> <dbl>
## 1 meat_based   63.2  14.2
## 2 plant_based  56.2  15.9

Next, line plots to inspect possible interactions. Again, the plot seems to show two main effects: plant-based foods elicit more simulations than plant-based foods and the pattern for label type is similar. There might be a difference for sensoty labels which doesn’t increase compared to neutral labels in the plant-based condition, but does increase in the meat-based condition.

rc_plot(
  working_file,
  "simulations",
  "rating3",
  "label_type",
  "Label Type",
  "Raincloud Plot of Food Simulation Ratings",
  "food_type"
  )

# get summary statistics
summary_simulations <-
  summarySEwithin(
    data = working_file %>%
      filter(measure == "simulations") %>%
      group_by(pp, label_type, food_type) %>%
      summarize(rating3 = mean(rating3, na.rm = TRUE)),
    measurevar = "rating3",
    withinvars = c("label_type", "food_type"),
    idvar = "pp"
  )

# have a look at means and SDs
summary_simulations
##        label_type   food_type   N  rating3       sd        se       ci
## 1      contextual  meat_based 170 64.42294 11.61811 0.8910681 1.759058
## 2      contextual plant_based 170 57.94765 10.25529 0.7865447 1.552718
## 3 health_positive  meat_based 170 61.02235 10.96733 0.8411555 1.660525
## 4 health_positive plant_based 170 54.59647 11.64130 0.8928472 1.762570
## 5         neutral  meat_based 170 62.68882 10.47310 0.8032500 1.585696
## 6         neutral plant_based 170 56.00235 10.56757 0.8104957 1.600000
## 7         sensory  meat_based 170 64.79412 10.84748 0.8319635 1.642380
## 8         sensory plant_based 170 56.43824 12.01869 0.9217913 1.819709
# line graph
line_plot(
  summary_simulations,
  "label_type",
  rating3,
  "food_type"
)

What’s the correlation between desire and simulations?

working_file %>%
  filter(measure == "desire") %>% 
  select(pp, food, food_type, label_type, stimulus_no, rating1) %>% 
  rename(desire = rating1) %>% 
  left_join(
    .,
    working_file %>% 
      filter(measure == "simulations") %>% 
      select(pp, food, food_type, label_type, stimulus_no, rating3) %>% 
      rename (simulations = rating3)
  ) %>% 
  summarize(
    r = cor(simulations, desire)
  )
## # A tibble: 1 x 1
##       r
##   <dbl>
## 1 0.470

Last, I want to see variance again by pp and by food. The boxplots show that there is quite a lot of variation for both factors, which is why we should model them as random effects.

ggplot(
  working_file %>% filter(measure == "simulations"),
  aes(
    x = pp,
    y = rating3
  )
) +
  geom_boxplot()

ggplot(
  working_file %>% filter(measure == "simulations"),
  aes(
    x = food,
    y = rating3
  )
) +
  geom_boxplot()

4. Analysis

Now I move on to the analysis. Note that I deviate from the preregistration here. The preregistration specified two repeated-measures ANOVAs, one for desire and one for simulations, as well as separate t-tests within the plant-based condition. I deviate here by fitting a mixed-effects models to account for variance by participant and by food item

I’ll structure the analysis section into confirmatory (testing hypotheses) and exploratory (robustness checks and secondary predictions.)

Here the hypotheses:

  1. For plant-based foods, sensory labels will lead to increased simulation ratings and desire compared to neutral labels.
  2. For plant-based foods, context labels will lead to increased simulation ratings and desire compared to neutral labels.
  3. For plant-based foods, health-positive labels will lead to similar simulation and desire ratings as neutral labels (Bayes factors supporting the null hypothesis of no difference between labels).
  4. Across label conditions, meat-based foods will be rated as more desirable than plant- based foods, especially for people who eat meat more often.
  5. Sensory and context labels will increase desire more for plant-based than for meat-based foods, compared to control labels.
  6. The intention to reduce meat will correlate with desire for the plant-based foods.

This is all quite complicated, so I’ll go model by model. The first three hypotheses can be tested in two models that predict desire and simulations with label_type, and then doing posthoc tests to compare the effects. For the fourth hypothesis I’ll run a model predicting desire with the interaction of food_type and meat_per_week. For the fifth hypothesis, I’ll predict desire with the interaction of label_type and food_type. For the sixth hypothesis, I’ll predict desire for plant-based foods with the intention to reduce eating meat.

Desire and simulations are conceptually related. That means we should control our error rate. In total, we’ll predict one of those two dependent measures in five models, so we’ll adjust our alpha to 0.01.

It’ll also make it easier to separate the working file into two separate files for desire and simulations.

desire <- 
  working_file %>% 
  filter(measure == "desire")

simulations <- 
  working_file %>% 
  filter(measure == "simulations")

4.1 Main effect of labels on desire for plant-based foods

4.1.1 Confirmatory

First, I build the mixed-effects model for desire, predicted by label_type for plant-based foods only. I follow a maximum random effects structure. We counterbalanced which food was assigned to what label type across participants. Therefore, we will model label_type as random slope for the food grouping.

pp also contains each combination of label_type because the experiment was within-subjects. That means we need to include the effect of label_type as a random slope nested in pp. I use sum-to-zero contrasts because they help with model convergence.

The model has problems converging and gives a singular fit warning. It’s not clear where the model hits the boundary, as there are no random effects that are close to zero or one, except the correlation of the food intercept with label type 1, which is one. Inspecting the thetas shows that the random slope of one label is estimated very close to zero.

# set contrasts
options(contrasts = c("contr.sum", "contr.poly")) 

# get a plants-only data set
desire_plants <- 
  desire %>% 
  filter(food_type == "plant_based") %>% 
  mutate(food_type = droplevels(food_type))

# construct model
h1_desire_model <- lme4::lmer(
  rating1 ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_desire_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating1 ~ label_type + (1 + label_type | pp) + (1 + label_type |  
##     food)
##    Data: desire_plants
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32474.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.85324 -0.73507 -0.00855  0.71937  2.71349 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  pp       (Intercept) 200.3955 14.1561                   
##           label_type1  43.5035  6.5957  -0.06            
##           label_type2  13.6059  3.6886   0.24 -0.28      
##           label_type3   8.7277  2.9543  -0.12 -0.93  0.52
##  food     (Intercept)  51.8187  7.1985                   
##           label_type1   0.1775  0.4213   1.00            
##           label_type2   5.9210  2.4333  -0.04 -0.04      
##           label_type3   7.4198  2.7239  -0.27 -0.27 -0.66
##  Residual             717.9239 26.7941                   
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  46.7826     1.9952  23.447
## label_type1   0.4616     0.9479   0.487
## label_type2  -1.4559     1.0049  -1.449
## label_type3  -0.2231     1.0276  -0.217
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1  0.064              
## label_type2  0.021 -0.267       
## label_type3 -0.142 -0.343 -0.383
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_desire_model,"theta")
##               pp.(Intercept)   pp.label_type1.(Intercept) 
##                  0.528329415                 -0.013809304 
##   pp.label_type2.(Intercept)   pp.label_type3.(Intercept) 
##                  0.033533032                 -0.012960571 
##               pp.label_type1   pp.label_type2.label_type1 
##                  0.245775462                 -0.037301752 
##   pp.label_type3.label_type1               pp.label_type2 
##                 -0.103863829                  0.128202371 
##   pp.label_type3.label_type2               pp.label_type3 
##                  0.034636504                  0.001211042 
##             food.(Intercept) food.label_type1.(Intercept) 
##                  0.268660763                  0.015724315 
## food.label_type2.(Intercept) food.label_type3.(Intercept) 
##                 -0.003346533                 -0.027194595 
##             food.label_type1 food.label_type2.label_type1 
##                  0.000000000                 -0.089966750 
## food.label_type3.label_type1             food.label_type2 
##                  0.059725062                  0.011925883 
## food.label_type3.label_type2             food.label_type3 
##                 -0.065425424                  0.041807834
isSingular(h1_desire_model, tol = 1e-5)
## [1] TRUE

Therefore, it’s probably a good idea to remove random correlation within the food factor to see whether that helps with singularity. It doesn’t: Now the model cannot estimate a variance around the slope for label1 within food.

# construct model
h1_desire_model_no_food_corr <- lmer_alt(
  rating1 ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type || food),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_desire_model_no_food_corr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 | pp) + (1 + re2.label_type1 + re2.label_type2 +  
##     re2.label_type3 || food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32476.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.86787 -0.74086 -0.00959  0.72017  2.74209 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr             
##  pp       (Intercept)     199.920  14.139                    
##           re1.label_type1  43.679   6.609   -0.05            
##           re1.label_type2  13.403   3.661    0.24 -0.28      
##           re1.label_type3   8.901   2.983   -0.12 -0.93  0.52
##  food     (Intercept)      51.869   7.202                    
##  food.1   re2.label_type1   0.000   0.000                    
##  food.2   re2.label_type2   3.177   1.782                    
##  food.3   re2.label_type3   4.010   2.002                    
##  Residual                 718.892  26.812                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  46.7816     1.9952  37.2186  23.447   <2e-16 ***
## label_type1   0.4639     0.9442 167.2478   0.491    0.624    
## label_type2  -1.4596     0.9339  32.1702  -1.563    0.128    
## label_type3  -0.2218     0.9420  31.1712  -0.235    0.815    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.015              
## label_type2  0.039 -0.286       
## label_type3 -0.016 -0.360 -0.202
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_desire_model_no_food_corr,"theta")
##                     pp.(Intercept)     pp.re1.label_type1.(Intercept) 
##                         0.52734724                        -0.01305881 
##     pp.re1.label_type2.(Intercept)     pp.re1.label_type3.(Intercept) 
##                         0.03287394                        -0.01312642 
##                 pp.re1.label_type1 pp.re1.label_type2.re1.label_type1 
##                         0.24614686                        -0.03686695 
## pp.re1.label_type3.re1.label_type1                 pp.re1.label_type2 
##                        -0.10486987                         0.12729734 
## pp.re1.label_type3.re1.label_type2                 pp.re1.label_type3 
##                         0.03480369                         0.00000000 
##                   food.(Intercept)               food.re2.label_type1 
##                         0.26860976                         0.00000000 
##               food.re2.label_type2               food.re2.label_type3 
##                         0.06647395                         0.07468488
isSingular(h1_desire_model_no_food_corr, tol = 1e-5)
## [1] TRUE

I try removing the random correlations for pp, but that leads to more problems.

# construct model
h1_desire_model_no_pp_corr <- lmer_alt(
  rating1 ~
    label_type +
    (1 + label_type || pp) +
    (1 + label_type | food),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_desire_model_no_pp_corr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 || pp) + (1 + re2.label_type1 + re2.label_type2 +  
##     re2.label_type3 | food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32479.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.88349 -0.73716 -0.00159  0.72018  2.70847 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr             
##  pp       (Intercept)     200.249  14.151                    
##  pp.1     re1.label_type1  23.505   4.848                    
##  pp.2     re1.label_type2  10.802   3.287                    
##  pp.3     re1.label_type3   0.000   0.000                    
##  food     (Intercept)      51.810   7.198                    
##           re2.label_type1   1.160   1.077    0.42            
##           re2.label_type2   4.856   2.204   -0.04  0.71      
##           re2.label_type3  10.052   3.171   -0.24 -0.97 -0.64
##  Residual                 723.779  26.903                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)  46.7830     1.9953 37.1913  23.446   <2e-16 ***
## label_type1   0.4584     0.9138 88.7823   0.502    0.617    
## label_type2  -1.4565     0.9722 24.9465  -1.498    0.147    
## label_type3  -0.2204     1.0684 17.4171  -0.206    0.839    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1  0.088              
## label_type2 -0.014 -0.145       
## label_type3 -0.130 -0.388 -0.421
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_desire_model_no_pp_corr,"theta")
##                       pp.(Intercept)                   pp.re1.label_type1 
##                         0.5259955059                         0.1802098813 
##                   pp.re1.label_type2                   pp.re1.label_type3 
##                         0.1221672029                         0.0000000000 
##                     food.(Intercept)     food.re2.label_type1.(Intercept) 
##                         0.2675480464                         0.0166371811 
##     food.re2.label_type2.(Intercept)     food.re2.label_type3.(Intercept) 
##                        -0.0028803224                        -0.0286267039 
##                 food.re2.label_type1 food.re2.label_type2.re2.label_type1 
##                         0.0364147398                         0.0650750422 
## food.re2.label_type3.re2.label_type1                 food.re2.label_type2 
##                        -0.1124376913                         0.0496599581 
## food.re2.label_type3.re2.label_type2                 food.re2.label_type3 
##                         0.0206427629                         0.0008847844
isSingular(h1_desire_model_no_pp_corr, tol = 1e-5)
## [1] TRUE

Next, I see whether changing the optimizer can help and whether estimates are stable across optimizers. Not all optimizers converge, which isn’t a good sign. Also, those that do converge show quite some discrepancies between their random effects.

h1_desire_model_all <- afex::all_fit(
  h1_desire_model,
  maxfun = 1e9
)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits <- sapply(h1_desire_model_all, is, "merMod")
ok_fits
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                         FALSE                         FALSE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                         FALSE
# was fit okay?
!sapply(h1_desire_model_all, inherits, "try-error")
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                          TRUE                          TRUE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                          TRUE
# compare fixed effects
ok_fits_details <- h1_desire_model_all[ok_fits]
t(sapply(ok_fits_details, "fixef"))
##                               (Intercept) label_type1 label_type2
## bobyqa.                          46.78268   0.4586223   -1.456177
## Nelder_Mead.                     46.78375   0.4591444   -1.455460
## nloptwrap.NLOPT_LN_NELDERMEAD    46.78260   0.4616065   -1.455916
## nloptwrap.NLOPT_LN_BOBYQA        46.78260   0.4616065   -1.455916
##                               label_type3
## bobyqa.                        -0.2203767
## Nelder_Mead.                   -0.2220568
## nloptwrap.NLOPT_LN_NELDERMEAD  -0.2230934
## nloptwrap.NLOPT_LN_BOBYQA      -0.2230934
# compare random effects
t(sapply(ok_fits_details, getME, "theta"))
##                               pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa.                            0.5285398               -0.013668703
## Nelder_Mead.                       0.5456959               -0.008990211
## nloptwrap.NLOPT_LN_NELDERMEAD      0.5283294               -0.013809304
## nloptwrap.NLOPT_LN_BOBYQA          0.5283294               -0.013809304
##                               pp.label_type2.(Intercept)
## bobyqa.                                       0.03338064
## Nelder_Mead.                                  0.03540711
## nloptwrap.NLOPT_LN_NELDERMEAD                 0.03353303
## nloptwrap.NLOPT_LN_BOBYQA                     0.03353303
##                               pp.label_type3.(Intercept) pp.label_type1
## bobyqa.                                     -0.012696551      0.2445100
## Nelder_Mead.                                -0.008266452      0.2763200
## nloptwrap.NLOPT_LN_NELDERMEAD               -0.012960571      0.2457755
## nloptwrap.NLOPT_LN_BOBYQA                   -0.012960571      0.2457755
##                               pp.label_type2.label_type1
## bobyqa.                                      -0.03660130
## Nelder_Mead.                                 -0.01824275
## nloptwrap.NLOPT_LN_NELDERMEAD                -0.03730175
## nloptwrap.NLOPT_LN_BOBYQA                    -0.03730175
##                               pp.label_type3.label_type1 pp.label_type2
## bobyqa.                                       -0.1027990      0.1280992
## Nelder_Mead.                                  -0.1223092      0.1286608
## nloptwrap.NLOPT_LN_NELDERMEAD                 -0.1038638      0.1282024
## nloptwrap.NLOPT_LN_BOBYQA                     -0.1038638      0.1282024
##                               pp.label_type3.label_type2 pp.label_type3
## bobyqa.                                      0.034727208    0.000000000
## Nelder_Mead.                                 0.009490009    0.125855127
## nloptwrap.NLOPT_LN_NELDERMEAD                0.034636504    0.001211042
## nloptwrap.NLOPT_LN_BOBYQA                    0.034636504    0.001211042
##                               food.(Intercept)
## bobyqa.                              0.2691873
## Nelder_Mead.                         1.6786511
## nloptwrap.NLOPT_LN_NELDERMEAD        0.2686608
## nloptwrap.NLOPT_LN_BOBYQA            0.2686608
##                               food.label_type1.(Intercept)
## bobyqa.                                         0.01581504
## Nelder_Mead.                                   -0.14380861
## nloptwrap.NLOPT_LN_NELDERMEAD                   0.01572432
## nloptwrap.NLOPT_LN_BOBYQA                       0.01572432
##                               food.label_type2.(Intercept)
## bobyqa.                                       -0.003378140
## Nelder_Mead.                                   0.571312183
## nloptwrap.NLOPT_LN_NELDERMEAD                 -0.003346533
## nloptwrap.NLOPT_LN_BOBYQA                     -0.003346533
##                               food.label_type3.(Intercept)
## bobyqa.                                         -0.0274849
## Nelder_Mead.                                    -0.2895133
## nloptwrap.NLOPT_LN_NELDERMEAD                   -0.0271946
## nloptwrap.NLOPT_LN_BOBYQA                       -0.0271946
##                               food.label_type1
## bobyqa.                             0.03351366
## Nelder_Mead.                        0.04151679
## nloptwrap.NLOPT_LN_NELDERMEAD       0.00000000
## nloptwrap.NLOPT_LN_BOBYQA           0.00000000
##                               food.label_type2.label_type1
## bobyqa.                                         0.06723607
## Nelder_Mead.                                   -0.47075182
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.08996675
## nloptwrap.NLOPT_LN_BOBYQA                      -0.08996675
##                               food.label_type3.label_type1
## bobyqa.                                        -0.10983767
## Nelder_Mead.                                   -0.36538859
## nloptwrap.NLOPT_LN_NELDERMEAD                   0.05972506
## nloptwrap.NLOPT_LN_BOBYQA                       0.05972506
##                               food.label_type2
## bobyqa.                             0.04975216
## Nelder_Mead.                        0.79150054
## nloptwrap.NLOPT_LN_NELDERMEAD       0.01192588
## nloptwrap.NLOPT_LN_BOBYQA           0.01192588
##                               food.label_type3.label_type2
## bobyqa.                                         0.02194645
## Nelder_Mead.                                    0.46606050
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.06542542
## nloptwrap.NLOPT_LN_BOBYQA                      -0.06542542
##                               food.label_type3
## bobyqa.                             0.00000000
## Nelder_Mead.                        0.20880418
## nloptwrap.NLOPT_LN_NELDERMEAD       0.04180783
## nloptwrap.NLOPT_LN_BOBYQA           0.04180783

It might be time to further simplify the model to get it to converge. Before I begin removing slopes, I’ll first run the model without a random intercept for food and without correlations between random effects. Still there are convergence issues.

# construct model
h1_desire_model_no_intercept <- lmer_alt(
  rating1 ~
    label_type +
    (1 + label_type || pp) +
    (0 + label_type || food),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

summary(h1_desire_model_no_intercept)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 || pp) + (0 + re2.label_typecontextual +  
##     re2.label_typehealth_positive + re2.label_typeneutral + re2.label_typesensory ||  
##     food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32529.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.84602 -0.74294 -0.00683  0.71675  2.82184 
## 
## Random effects:
##  Groups   Name                          Variance Std.Dev.
##  pp       (Intercept)                   199.30   14.117  
##  pp.1     re1.label_type1                23.37    4.834  
##  pp.2     re1.label_type2                10.65    3.264  
##  pp.3     re1.label_type3                 0.00    0.000  
##  food     re2.label_typecontextual       59.14    7.690  
##  food.1   re2.label_typehealth_positive  53.24    7.297  
##  food.2   re2.label_typeneutral          51.60    7.183  
##  food.3   re2.label_typesensory          62.43    7.901  
##  Residual                               725.73   26.939  
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  46.7811     1.4469 190.4606  32.331   <2e-16 ***
## label_type1   0.4610     1.7217  33.0913   0.268    0.791    
## label_type2  -1.4550     1.6559  34.3539  -0.879    0.386    
## label_type3  -0.2148     1.6243  33.5805  -0.132    0.896    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1  0.013              
## label_type2 -0.018 -0.319       
## label_type3 -0.027 -0.318 -0.304
## convergence code: 0
## boundary (singular) fit: see ?isSingular
isSingular(h1_desire_model_no_intercept, tol = 1e-5)
## [1] TRUE

Because the random slopes for label_type within food seemed to lead to singular fit, I’ll remove that slope. However, that prevents us from generalizing to other foods (if there is an effect) and p-values for the effect of label_type become less conservative. The model doesn’t converge.

# construct model
h1_desire_model_no_slope <- lme4::lmer(
  rating1 ~
    label_type +
    (1 + label_type | pp) +
    (1 | food),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

summary(h1_desire_model_no_slope)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating1 ~ label_type + (1 + label_type | pp) + (1 | food)
##    Data: desire_plants
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32478.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.91258 -0.74349  0.00084  0.72470  2.62357 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  pp       (Intercept) 199.582  14.127                    
##           label_type1  42.776   6.540   -0.05            
##           label_type2  12.993   3.605    0.25 -0.26      
##           label_type3   9.012   3.002   -0.13 -0.93  0.51
##  food     (Intercept)  51.614   7.184                    
##  Residual             722.473  26.879                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  46.7815     1.9918  23.487
## label_type1   0.4642     0.9430   0.492
## label_type2  -1.4600     0.8450  -1.728
## label_type3  -0.2208     0.8311  -0.266
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.016              
## label_type2  0.044 -0.312       
## label_type3 -0.020 -0.408 -0.257
## convergence code: 0
## Model failed to converge with max|grad| = 0.0524254 (tol = 0.002, component 1)

Last, I’ll remove the entire food term. The model still runs into boundary problems.

# construct model
h1_desire_model_no_food <- lme4::lmer(
  rating1 ~
    label_type +
    (1 + label_type | pp),
  desire_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

summary(h1_desire_model_no_food)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating1 ~ label_type + (1 + label_type | pp)
##    Data: desire_plants
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 32651.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.83348 -0.77891 -0.01034  0.75289  2.64046 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  pp       (Intercept) 196.750  14.027                    
##           label_type1  41.009   6.404   -0.06            
##           label_type2   9.833   3.136    0.23 -0.24      
##           label_type3   5.443   2.333   -0.09 -0.94  0.48
##  Residual             777.391  27.882                    
## Number of obs: 3400, groups:  pp, 170
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  46.7815     1.1773  39.737
## label_type1   0.4750     0.9629   0.493
## label_type2  -1.4709     0.8624  -1.706
## label_type3  -0.2062     0.8473  -0.243
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.026              
## label_type2  0.060 -0.310       
## label_type3 -0.018 -0.382 -0.284
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_desire_model_no_food,"theta")
##             pp.(Intercept) pp.label_type1.(Intercept) 
##                0.503080456               -0.012663962 
## pp.label_type2.(Intercept) pp.label_type3.(Intercept) 
##                0.026279068               -0.007893868 
##             pp.label_type1 pp.label_type2.label_type1 
##                0.229329438               -0.026060945 
## pp.label_type3.label_type1             pp.label_type2 
##               -0.079397936                0.106201182 
## pp.label_type3.label_type2             pp.label_type3 
##                0.025205164                0.000000000
isSingular(h1_desire_model_no_food, tol = 1e-5)
## [1] TRUE

The next step would be to remove the slope for pp, but that removal would increase our Type I error chance and lead to dramatically wrong p-values. Instead, I’ll run a repeated-measures ANOVA.

desire_anova <- 
  aov_ez(
    id = "pp", 
    dv = "rating1", 
    data = desire_plants, 
    within = "label_type"
  )

anova(desire_anova)
## Anova Table (Type 3 tests)
## 
## Response: rating1
##            num Df den Df    MSE      F       ges Pr(>F)
## label_type 2.8937 489.04 181.68 1.2535 0.0026495 0.2898

The diagnostics look okay.

# get the residuals and add the pp identifier
anova_residuals <- as_tibble(desire_anova$lm$residuals) %>%
  mutate(pp = row_number()) %>%
  select(pp, everything())

# inspect residuals
densityplot(scale(anova_residuals$contextual, scale = TRUE))

densityplot(scale(anova_residuals$health_positive, scale = TRUE))

densityplot(scale(anova_residuals$neutral, scale = TRUE))

densityplot(scale(anova_residuals$sensory, scale = TRUE))

4.1.2 Exploratory

The main effect isn’t significant, but I’ll inspect the post-hoc tests out of curiousity: all comparisons are nonsignificant.

desire_anova_posthocs <- 
  emmeans(
    desire_anova,
    pairwise ~ label_type
  )

desire_anova_posthocs
## $emmeans
##  label_type      emmean   SE  df lower.CL upper.CL
##  contextual        47.3 1.47 372     44.4     50.1
##  health_positive   45.3 1.47 372     42.4     48.2
##  neutral           46.6 1.47 372     43.7     49.5
##  sensory           48.0 1.47 372     45.1     50.9
## 
## Warning: EMMs are biased unless design is perfectly balanced 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                     estimate   SE  df t.ratio p.value
##  contextual - health_positive    1.946 1.44 507  1.355  0.5282 
##  contextual - neutral            0.681 1.44 507  0.474  0.9647 
##  contextual - sensory           -0.727 1.44 507 -0.506  0.9576 
##  health_positive - neutral      -1.265 1.44 507 -0.881  0.8148 
##  health_positive - sensory      -2.673 1.44 507 -1.862  0.2461 
##  neutral - sensory              -1.408 1.44 507 -0.981  0.7605 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

To quantify evidence for the null, I’ll also run a Bayesian ANOVA. There’s strong evidence for the null. Usually I’d also conduct robustness checks, but there were no outliers in the model diagnostics.

desire_anova_bayes <- 
  anovaBF(
   rating1 ~ label_type + pp,
   data = as.data.frame(desire %>% filter(food_type == "plant_based")), whichRandom = "pp"
  )

1/desire_anova_bayes
## Bayes factor analysis
## --------------
## [1] pp : 263.0408 ±0.69%
## 
## Against denominator:
##   rating1 ~ label_type + pp 
## ---
## Bayes factor type: BFlinearModel, JZS

Last, I see whether the results are robust to the exclusion of the two alleged vegetarians: the results don’t change.

desire_anova_no_vegetarians <- 
  aov_ez(
    id = "pp", 
    dv = "rating1", 
    data = desire_plants %>% 
      filter(!pp %in% c("pp_77", "pp_87")), 
    within = "label_type"
  )

anova(desire_anova)
## Anova Table (Type 3 tests)
## 
## Response: rating1
##            num Df den Df    MSE      F       ges Pr(>F)
## label_type 2.8937 489.04 181.68 1.2535 0.0026495 0.2898

4.2 Main effect of labels on simulations

4.2.1 Confirmatory

I’ll fit the same initial model as above, but this time predicting simulations. Once more, the model runs into a singularity problem. One theta is estimated to be zero.

# get plants-only data set
simulations_plants <- 
  simulations %>% 
  filter(food_type == "plant_based") %>% 
  mutate(food_type = droplevels(food_type))

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

# construct model
h1_simulations_model <- lme4::lmer(
  rating3 ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type | food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_simulations_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating3 ~ label_type + (1 + label_type | pp) + (1 + label_type |  
##     food)
##    Data: simulations_plants
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 30911.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3616 -0.5559  0.0394  0.5896  3.3425 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  pp       (Intercept) 229.922  15.163                    
##           label_type1   9.542   3.089   -0.25            
##           label_type2  10.764   3.281    0.06 -0.59      
##           label_type3   8.220   2.867   -0.09 -0.45  0.98
##  food     (Intercept)   7.240   2.691                    
##           label_type1   2.646   1.627   -0.19            
##           label_type2   3.008   1.734   -0.16 -0.20      
##           label_type3   5.546   2.355    0.26 -0.50 -0.75
##  Residual             444.827  21.091                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  56.2461     1.3584  41.405
## label_type1   1.7044     0.7623   2.236
## label_type2  -1.6534     0.7787  -2.123
## label_type3  -0.2419     0.8475  -0.285
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.106              
## label_type2 -0.019 -0.327       
## label_type3  0.052 -0.387 -0.348
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_simulations_model,"theta")
##               pp.(Intercept)   pp.label_type1.(Intercept) 
##                  0.718943077                 -0.036490422 
##   pp.label_type2.(Intercept)   pp.label_type3.(Intercept) 
##                  0.009599525                 -0.011734441 
##               pp.label_type1   pp.label_type2.label_type1 
##                  0.141846319                 -0.092648512 
##   pp.label_type3.label_type1               pp.label_type2 
##                 -0.065769747                  0.124589515 
##   pp.label_type3.label_type2               pp.label_type3 
##                  0.118385804                  0.000523398 
##             food.(Intercept) food.label_type1.(Intercept) 
##                  0.127577211                 -0.014620294 
## food.label_type2.(Intercept) food.label_type3.(Intercept) 
##                 -0.013531451                  0.029051309 
##             food.label_type1 food.label_type2.label_type1 
##                  0.075732595                 -0.019053078 
## food.label_type3.label_type1             food.label_type2 
##                 -0.051301781                  0.078847045 
## food.label_type3.label_type2             food.label_type3 
##                 -0.094822253                  0.000000000
isSingular(h1_simulations_model, tol = 1e-5)
## [1] TRUE

Again I remove random correlations within the food factor to see whether that helps with singularity. It doesn’t: Now the model cannot estimate a variance around the slope for label2 within food.

# construct model
h1_simulations_model_no_food_corr <- lmer_alt(
  rating1 ~
    label_type +
    (1 + label_type | pp) +
    (1 + label_type || food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_simulations_model_no_food_corr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 | pp) + (1 + re2.label_type1 + re2.label_type2 +  
##     re2.label_type3 || food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 31599
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6647 -0.5303  0.1005  0.5904  3.4209 
## 
## Random effects:
##  Groups   Name            Variance  Std.Dev. Corr             
##  pp       (Intercept)     2.931e+02 17.12025                  
##           re1.label_type1 8.986e+00  2.99760 -0.22            
##           re1.label_type2 8.127e+00  2.85082  0.03 -0.79      
##           re1.label_type3 6.253e+00  2.50053  0.12 -0.23  0.74
##  food     (Intercept)     9.325e+00  3.05363                  
##  food.1   re2.label_type1 1.197e-04  0.01094                  
##  food.2   re2.label_type2 0.000e+00  0.00000                  
##  food.3   re2.label_type3 9.838e-01  0.99188                  
##  Residual                 5.508e+02 23.46945                  
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  60.1741     1.5337 131.8907  39.233   <2e-16 ***
## label_type1   1.5388     0.7342 195.8936   2.096   0.0374 *  
## label_type2  -1.7836     0.7307 281.4112  -2.441   0.0153 *  
## label_type3  -0.3616     0.7564  39.3394  -0.478   0.6352    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.060              
## label_type2  0.006 -0.377       
## label_type3  0.027 -0.310 -0.237
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_simulations_model_no_food_corr,"theta")
##                     pp.(Intercept)     pp.re1.label_type1.(Intercept) 
##                       0.7294695247                      -0.0286051351 
##     pp.re1.label_type2.(Intercept)     pp.re1.label_type3.(Intercept) 
##                       0.0030674591                       0.0133041933 
##                 pp.re1.label_type1 pp.re1.label_type2.re1.label_type1 
##                       0.1244791403                      -0.0983248187 
## pp.re1.label_type3.re1.label_type1                 pp.re1.label_type2 
##                      -0.0218295094                       0.0712577828 
## pp.re1.label_type3.re1.label_type2                 pp.re1.label_type3 
##                       0.1034256704                       0.0010983621 
##                   food.(Intercept)               food.re2.label_type1 
##                       0.1301107671                       0.0004662237 
##               food.re2.label_type2               food.re2.label_type3 
##                       0.0000000000                       0.0422625381
isSingular(h1_simulations_model_no_food_corr, tol = 1e-5)
## [1] TRUE

Removing the correlations for random terms within pp makes the problem worse.

# construct model
h1_simulations_model_no_pp_corr <- lmer_alt(
  rating1 ~
    label_type +
    (1 + label_type || pp) +
    (1 + label_type | food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

# inspect model
summary(h1_simulations_model_no_pp_corr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 || pp) + (1 + re2.label_type1 + re2.label_type2 +  
##     re2.label_type3 | food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 31596.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6275 -0.5364  0.1010  0.5954  3.4281 
## 
## Random effects:
##  Groups   Name            Variance Std.Dev. Corr             
##  pp       (Intercept)     292.811  17.112                    
##  pp.1     re1.label_type1   2.404   1.551                    
##  pp.2     re1.label_type2   2.252   1.501                    
##  pp.3     re1.label_type3   7.651   2.766                    
##  food     (Intercept)       9.369   3.061                    
##           re2.label_type1   3.902   1.975   -0.68            
##           re2.label_type2   3.824   1.956   -0.16 -0.48      
##           re2.label_type3   5.006   2.238    0.58 -0.28 -0.68
##  Residual                 552.130  23.497                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  60.1719     1.5340 131.7893  39.224   <2e-16 ***
## label_type1   1.5341     0.8346  18.4658   1.838   0.0822 .  
## label_type2  -1.7836     0.8317  20.4197  -2.144   0.0442 *  
## label_type3  -0.3595     0.8847  19.9729  -0.406   0.6888    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.161              
## label_type2 -0.037 -0.368       
## label_type3  0.147 -0.303 -0.423
## convergence code: 0
## Model failed to converge with max|grad| = 0.0183668 (tol = 0.002, component 1)

Next, I see whether changing the optimizer can help and whether estimates are stable across optimizers. Once again, not all optimizers converge, which isn’t a good sign. Also, those that do converge show quite some discrepancies between their random effects.

h1_simulations_model_all <- afex::all_fit(
  h1_simulations_model,
  maxfun = 1e9
)
## bobyqa. : [OK]
## Nelder_Mead. : [OK]
## optimx.nlminb : [ERROR]
## optimx.L-BFGS-B : [ERROR]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
## nmkbw. : [ERROR]
# which ones converged
ok_fits2 <- sapply(h1_simulations_model_all, is, "merMod")
ok_fits2
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                         FALSE                         FALSE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                         FALSE
# was fit okay?
!sapply(h1_simulations_model_all, inherits, "try-error")
##                       bobyqa.                  Nelder_Mead. 
##                          TRUE                          TRUE 
##                 optimx.nlminb               optimx.L-BFGS-B 
##                          TRUE                          TRUE 
## nloptwrap.NLOPT_LN_NELDERMEAD     nloptwrap.NLOPT_LN_BOBYQA 
##                          TRUE                          TRUE 
##                        nmkbw. 
##                          TRUE
# compare fixed effects
ok_fits_details2 <- h1_simulations_model_all[ok_fits2]
t(sapply(ok_fits_details2, "fixef"))
##                               (Intercept) label_type1 label_type2
## bobyqa.                          56.24607    1.704406   -1.653449
## Nelder_Mead.                     56.25855    1.694912   -1.652943
## nloptwrap.NLOPT_LN_NELDERMEAD    56.24607    1.704418   -1.653449
## nloptwrap.NLOPT_LN_BOBYQA        56.24607    1.704418   -1.653449
##                               label_type3
## bobyqa.                        -0.2418600
## Nelder_Mead.                   -0.2420791
## nloptwrap.NLOPT_LN_NELDERMEAD  -0.2418685
## nloptwrap.NLOPT_LN_BOBYQA      -0.2418685
# compare random effects
t(sapply(ok_fits_details2, getME, "theta"))
##                               pp.(Intercept) pp.label_type1.(Intercept)
## bobyqa.                            0.7190269                -0.03637453
## Nelder_Mead.                       0.7218706                -0.04705320
## nloptwrap.NLOPT_LN_NELDERMEAD      0.7189431                -0.03649042
## nloptwrap.NLOPT_LN_BOBYQA          0.7189431                -0.03649042
##                               pp.label_type2.(Intercept)
## bobyqa.                                      0.009536035
## Nelder_Mead.                                 0.012047748
## nloptwrap.NLOPT_LN_NELDERMEAD                0.009599525
## nloptwrap.NLOPT_LN_BOBYQA                    0.009599525
##                               pp.label_type3.(Intercept) pp.label_type1
## bobyqa.                                     -0.011887533     0.14141465
## Nelder_Mead.                                -0.004018013     0.02503186
## nloptwrap.NLOPT_LN_NELDERMEAD               -0.011734441     0.14184632
## nloptwrap.NLOPT_LN_BOBYQA                   -0.011734441     0.14184632
##                               pp.label_type2.label_type1
## bobyqa.                                      -0.09251459
## Nelder_Mead.                                 -0.09735528
## nloptwrap.NLOPT_LN_NELDERMEAD                -0.09264851
## nloptwrap.NLOPT_LN_BOBYQA                    -0.09264851
##                               pp.label_type3.label_type1 pp.label_type2
## bobyqa.                                      -0.06558412     0.12454813
## Nelder_Mead.                                 -0.14962948     0.05491353
## nloptwrap.NLOPT_LN_NELDERMEAD                -0.06576975     0.12458951
## nloptwrap.NLOPT_LN_BOBYQA                    -0.06576975     0.12458951
##                               pp.label_type3.label_type2 pp.label_type3
## bobyqa.                                       0.11845219    0.000000000
## Nelder_Mead.                                  0.07609333    0.005343884
## nloptwrap.NLOPT_LN_NELDERMEAD                 0.11838580    0.000523398
## nloptwrap.NLOPT_LN_BOBYQA                     0.11838580    0.000523398
##                               food.(Intercept)
## bobyqa.                              0.1275270
## Nelder_Mead.                         0.1583579
## nloptwrap.NLOPT_LN_NELDERMEAD        0.1275772
## nloptwrap.NLOPT_LN_BOBYQA            0.1275772
##                               food.label_type1.(Intercept)
## bobyqa.                                        -0.01492922
## Nelder_Mead.                                   -0.03400713
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.01462029
## nloptwrap.NLOPT_LN_BOBYQA                      -0.01462029
##                               food.label_type2.(Intercept)
## bobyqa.                                        -0.01355212
## Nelder_Mead.                                    0.27487541
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.01353145
## nloptwrap.NLOPT_LN_BOBYQA                      -0.01353145
##                               food.label_type3.(Intercept)
## bobyqa.                                         0.02931423
## Nelder_Mead.                                   -0.31596968
## nloptwrap.NLOPT_LN_NELDERMEAD                   0.02905131
## nloptwrap.NLOPT_LN_BOBYQA                       0.02905131
##                               food.label_type1
## bobyqa.                             0.07584090
## Nelder_Mead.                        0.09245925
## nloptwrap.NLOPT_LN_NELDERMEAD       0.07573259
## nloptwrap.NLOPT_LN_BOBYQA           0.07573259
##                               food.label_type2.label_type1
## bobyqa.                                        -0.01904021
## Nelder_Mead.                                   -0.30937809
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.01905308
## nloptwrap.NLOPT_LN_BOBYQA                      -0.01905308
##                               food.label_type3.label_type1
## bobyqa.                                        -0.05136257
## Nelder_Mead.                                    0.00569828
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.05130178
## nloptwrap.NLOPT_LN_BOBYQA                      -0.05130178
##                               food.label_type2
## bobyqa.                             0.07881494
## Nelder_Mead.                        2.09739189
## nloptwrap.NLOPT_LN_NELDERMEAD       0.07884705
## nloptwrap.NLOPT_LN_BOBYQA           0.07884705
##                               food.label_type3.label_type2
## bobyqa.                                        -0.09490507
## Nelder_Mead.                                    0.14972981
## nloptwrap.NLOPT_LN_NELDERMEAD                  -0.09482225
## nloptwrap.NLOPT_LN_BOBYQA                      -0.09482225
##                               food.label_type3
## bobyqa.                               0.000000
## Nelder_Mead.                          2.643437
## nloptwrap.NLOPT_LN_NELDERMEAD         0.000000
## nloptwrap.NLOPT_LN_BOBYQA             0.000000

It might be time to further simplify the model to get it to converge. Before I begin removing slopes, I’ll first run the model without a random intercept for food and without correlations between random effects. The model doesn’t converge still.

# construct model
h1_simulations_model_no_intercept <- lmer_alt(
  rating3 ~
    label_type +
    (1 + label_type || pp) +
    (0 + label_type || food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

summary(h1_simulations_model_no_intercept)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating3 ~ label_type + (1 + re1.label_type1 + re1.label_type2 +  
##     re1.label_type3 || pp) + (0 + re2.label_typecontextual +  
##     re2.label_typehealth_positive + re2.label_typeneutral + re2.label_typesensory ||  
##     food)
##    Data: data
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 30930.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5193 -0.5412  0.0389  0.5917  3.3844 
## 
## Random effects:
##  Groups   Name                          Variance Std.Dev.
##  pp       (Intercept)                   229.991  15.165  
##  pp.1     re1.label_type1                 3.899   1.975  
##  pp.2     re1.label_type2                10.783   3.284  
##  pp.3     re1.label_type3                 8.745   2.957  
##  food     re2.label_typecontextual        8.391   2.897  
##  food.1   re2.label_typehealth_positive   8.398   2.898  
##  food.2   re2.label_typeneutral          15.945   3.993  
##  food.3   re2.label_typesensory           4.583   2.141  
##  Residual                               447.921  21.164  
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  56.2474     1.2654 183.4737  44.449   <2e-16 ***
## label_type1   1.6997     0.8629  32.2486   1.970   0.0575 .  
## label_type2  -1.6511     0.8862  32.4348  -1.863   0.0715 .  
## label_type3  -0.2418     0.9808  29.8167  -0.247   0.8069    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.011              
## label_type2 -0.010 -0.294       
## label_type3  0.067 -0.377 -0.368
## convergence code: 0
## Model failed to converge with max|grad| = 0.00373721 (tol = 0.002, component 1)

Because the random slopes for label_type within food seemed to lead to singular fit, I’ll remove that slope. Again, that prevents us from generalizing to other foods (if there is an effect) and p-values for the effect of label_type become less conservative. The model still throws a warning, but the singularity is within acceptable tolerance.

# construct model
h1_simulations_model_no_slope <- lme4::lmer(
  rating3 ~
    label_type +
    (1 + label_type | pp) +
    (1 | food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  )
)

summary(h1_simulations_model_no_slope)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating3 ~ label_type + (1 + label_type | pp) + (1 | food)
##    Data: simulations_plants
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 30916.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3375 -0.5489  0.0444  0.5864  3.3789 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  pp       (Intercept) 229.612  15.153                    
##           label_type1  10.297   3.209   -0.24            
##           label_type2  11.842   3.441    0.06 -0.63      
##           label_type3   8.121   2.850   -0.09 -0.47  0.98
##  food     (Intercept)   7.211   2.685                    
##  Residual             447.309  21.150                    
## Number of obs: 3400, groups:  pp, 170; food, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  56.2462     1.3575  41.434
## label_type1   1.7063     0.6748   2.529
## label_type2  -1.6546     0.6815  -2.428
## label_type3  -0.2424     0.6652  -0.364
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2
## label_type1 -0.074              
## label_type2  0.018 -0.376       
## label_type3 -0.026 -0.350 -0.166
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(h1_simulations_model_no_slope,"theta")
##             pp.(Intercept) pp.label_type1.(Intercept) 
##               7.164627e-01              -3.589059e-02 
## pp.label_type2.(Intercept) pp.label_type3.(Intercept) 
##               9.054683e-03              -1.236489e-02 
##             pp.label_type1 pp.label_type2.label_type1 
##               1.474154e-01              -1.039909e-01 
## pp.label_type3.label_type1             pp.label_type2 
##              -6.827576e-02               1.248150e-01 
## pp.label_type3.label_type2             pp.label_type3 
##               1.155036e-01               5.423558e-05 
##           food.(Intercept) 
##               1.269722e-01
isSingular(h1_simulations_model_no_slope, tol = 1e-5)
## [1] FALSE

The model diagnostics look fine overall. As expected, when we plot fitted values against residulas there are clear cut-offs, because the scale is bounded at 0 and 100, such that a fitted value of 50 can only have a residual of plus or minus 50. Looking at potential outliers, participants pp_158 and pp_130 stick out slightly for Cook’s distance. As for foods, tofu_stir_fty and cauliflower_tacos also stand out. In the exploratory section, I’ll run a robustness check and see whether the results change if we exclude these participants and foods.

# inspect standardized residuals
densityplot(resid(h1_simulations_model_no_slope, scaled = TRUE)) 

# q-q plot
car::qqPlot(resid(h1_simulations_model_no_slope, scaled = TRUE))

## [1] 2423  322
# proportion of residuals in the extremes of the distribution
lmer_residuals(h1_simulations_model_no_slope)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05      0.0503  FALSE       
## 2                 2.5            0.01      0.0162  FALSE       
## 3                 3              0.0001    0.00294 FALSE
# obtain outliers
h1_simulations_model_no_slope_outliers_pp <- influence.ME::influence(h1_simulations_model_no_slope, c("pp"))
h1_simulations_model_no_slope_outliers_food <- influence.ME::influence(h1_simulations_model_no_slope, c("food"))

# plot formal outliers for pp
plot(h1_simulations_model_no_slope_outliers_pp, which = 'cook')

plot(h1_simulations_model_no_slope_outliers_pp, which = 'dfbetas')[1]

plot(h1_simulations_model_no_slope_outliers_pp, which = 'dfbetas')[2]

plot(h1_simulations_model_no_slope_outliers_pp, which = 'dfbetas')[3]

plot(h1_simulations_model_no_slope_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h1_simulations_model_no_slope_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_158         0.0366
## 2 pp_130         0.0338
## 3 pp_125         0.0229
## 4 pp_28          0.0219
## 5 pp_106         0.0208
## 6 pp_5           0.0204
# plot formal outliers for food
plot(h1_simulations_model_no_slope_outliers_food, which = 'cook')

plot(h1_simulations_model_no_slope_outliers_food, which = 'dfbetas')[1]

plot(h1_simulations_model_no_slope_outliers_food, which = 'dfbetas')[2]

plot(h1_simulations_model_no_slope_outliers_food, which = 'dfbetas')[3]

plot(h1_simulations_model_no_slope_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h1_simulations_model_no_slope_outliers_food, "food")
## # A tibble: 6 x 2
##   food              cooks_distance
##   <chr>                      <dbl>
## 1 tofu_stir_fty             0.142 
## 2 cauliflower_tacos         0.132 
## 3 bean_burger               0.0958
## 4 bean_chilli               0.0939
## 5 tomato_soup               0.0838
## 6 lentil_soup               0.0694
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h1_simulations_model_no_slope, type = c('p', 'smooth'))

Next, I obtain p-values. We get the same warning as above. The effect is marginally significant, but not at our adjusted alpha level.

# get p-value
h1_simulations_model_no_slope_p <- mixed(
  rating3 ~
    label_type +
    (1 + label_type | pp) +
    (1 | food),
  simulations_plants,
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  ),
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_simulations_model_no_slope_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating3 ~ label_type + (1 + label_type | pp) + (1 | food)
## Data: simulations_plants
##            num Df den Df      F  Pr(>F)  
## label_type      3  242.1 2.9788 0.03217 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get approximate effect size
r.squaredGLMM(h1_simulations_model_no_slope)
##              R2m       R2c
## [1,] 0.002052632 0.3608351

4.2.2 Exploratory

Just out of curiousity, I’ll run pairwise comparisons to see where groups differ: the only significant difference is between contextual and health positive labels. But again, this effect isn’t significant at our adjusted alpha level.

h1_simulations_model_posthocs <- 
  emmeans(
    h1_simulations_model_no_slope,
    pairwise ~ label_type
  )

h1_simulations_model_posthocs
## $emmeans
##  label_type      emmean   SE  df asymp.LCL asymp.UCL
##  contextual        58.0 1.47 Inf      55.1      60.8
##  health_positive   54.6 1.53 Inf      51.6      57.6
##  neutral           56.0 1.50 Inf      53.1      58.9
##  sensory           56.4 1.59 Inf      53.3      59.6
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                     estimate   SE  df z.ratio p.value
##  contextual - health_positive    3.361 1.12 Inf  2.988  0.0149 
##  contextual - neutral            1.949 1.10 Inf  1.770  0.2877 
##  contextual - sensory            1.516 1.12 Inf  1.351  0.5304 
##  health_positive - neutral      -1.412 1.03 Inf -1.373  0.5162 
##  health_positive - sensory      -1.845 1.20 Inf -1.533  0.4179 
##  neutral - sensory              -0.433 1.19 Inf -0.364  0.9835 
## 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates

Next, I check whether the results change if we exclude the model outliers, excluding pp_158, pp_130, tofu_stir_fty, and cauliflower_tacos. The effect becomes less significant, which a) probably indicates that the effect is not robust, but b) could also merely reflect reduced power after excluding quite a number of cases. Overall, I believe the effect is not robust.

# get p-value
h1_simulations_model_no_slope_no_outliers_p <- mixed(
  rating3 ~
    label_type +
    (1 + label_type | pp) +
    (1 | food),
  simulations_plants %>% 
    filter(!pp %in% c("pp_158", "pp_130")) %>% 
    filter(!food %in% c("tofu_stir_fty", "cauliflower_tacos")),
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  ),
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_simulations_model_no_slope_no_outliers_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating3 ~ label_type + (1 + label_type | pp) + (1 | food)
## Data: %>%
## Data: simulations_plants %>% filter(!pp %in% c("pp_158", "pp_130"))
## Data: filter(!food %in% c("tofu_stir_fty", "cauliflower_tacos"))
##            num Df den Df      F  Pr(>F)  
## label_type      3 287.47 2.6027 0.05223 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Last, excluding the two alleged vegetarians doesn’t change the results either: the p-value gets a bit lower, but still above our adjusted cut-off.

# get p-value
h1_simulations_model_no_slope_no_vegetarians_p <- mixed(
  rating3 ~
    label_type +
    (1 + label_type | pp) +
    (1 | food),
  simulations_plants %>% 
    filter(!pp %in% c("pp_77", "pp_87")),
  control = lmerControl(
    optCtrl=list(maxfun=1e9)
  ),
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h1_simulations_model_no_slope_no_vegetarians_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating3 ~ label_type + (1 + label_type | pp) + (1 | food)
## Data: %>%
## Data: simulations_plants
## Data: filter(!pp %in% c("pp_77", "pp_87"))
##            num Df den Df      F  Pr(>F)  
## label_type      3 238.58 3.0142 0.03073 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Interaction of food type and meat eating frequency

4.3.1 Confirmatory

Next, I see whether there’s a main effect of food_type across label conditions and whether this effect is moderated by meat_per_week. meat_per_week is a trait-level variable and therefore doesn’t need a random slope. Otherwise, we keep the same original random effects structure as above. I also standardize meat_per_week to help with interpretation and convergence.

The model converges without warnings.

# standardize meat eating frequency
desire <- 
  desire %>% 
  mutate(
    meat_per_week_s = scale(meat_per_week, scale = TRUE)
  )

# set contrasts
options(contrasts = c("contr.sum", "contr.poly")) 

# fit model
h4_food_meat_model <- 
  lme4::lmer(
    rating1 ~ 
      food_type*meat_per_week_s +
      (1 + food_type | pp) +
      (1 | food),
    desire,
    control = lmerControl(
      optCtrl=list(maxfun=1e9)
    )
  )

summary(h4_food_meat_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating1 ~ food_type * meat_per_week_s + (1 + food_type | pp) +  
##     (1 | food)
##    Data: desire
## Control: lmerControl(optCtrl = list(maxfun = 1e+09))
## 
## REML criterion at convergence: 64774.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1977 -0.7491  0.0868  0.7369  2.8283 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  pp       (Intercept) 115.43   10.744        
##           food_type1   40.33    6.350   -0.19
##  food     (Intercept)  68.62    8.284        
##  Residual             775.71   27.852        
## Number of obs: 6760, groups:  pp, 169; food, 40
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                 52.3148     1.5853  32.999
## food_type1                   5.5476     1.4384   3.857
## meat_per_week_s             -0.4371     0.8932  -0.489
## food_type1:meat_per_week_s   3.6887     0.5945   6.205
## 
## Correlation of Fixed Effects:
##             (Intr) fd_ty1 mt_p__
## food_type1  -0.034              
## met_pr_wk_s  0.000  0.000       
## fd_typ1:___  0.000  0.000 -0.148

The model diagnostics look okay, but there is a clear outlier for participants: pp_87. For foods, there’s no clear outliers. I’ll test the robustness of the model without the one participant in the exploratory section.

# inspect standardized residuals
densityplot(resid(h4_food_meat_model, scaled = TRUE)) 

# q-q plot
car::qqPlot(resid(h4_food_meat_model, scaled = TRUE))

## 6799 1245 
## 6759 1245
# proportion of residuals in the extremes of the distribution
lmer_residuals(h4_food_meat_model)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05     0.0275   TRUE        
## 2                 2.5            0.01     0.00459  TRUE        
## 3                 3              0.0001   0.000148 FALSE
# obtain outliers
h4_food_meat_model_outliers_pp <- influence.ME::influence(h4_food_meat_model, c("pp"))
h4_food_meat_model_outliers_food <- influence.ME::influence(h4_food_meat_model, c("food"))

# plot formal outliers for pp
plot(h4_food_meat_model_outliers_pp, which = 'cook')

plot(h4_food_meat_model_outliers_pp, which = 'dfbetas')[1]

plot(h4_food_meat_model_outliers_pp, which = 'dfbetas')[2]

plot(h4_food_meat_model_outliers_pp, which = 'dfbetas')[3]

plot(h4_food_meat_model_outliers_pp, which = 'dfbetas')[4]

# which ones are the highest
arrange_cook(h4_food_meat_model_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_87          0.0913
## 2 pp_20          0.0362
## 3 pp_172         0.0325
## 4 pp_144         0.0312
## 5 pp_36          0.0266
## 6 pp_62          0.0239
# plot formal outliers for food
plot(h4_food_meat_model_outliers_food, which = 'cook')

plot(h4_food_meat_model_outliers_food, which = 'dfbetas')[1]

plot(h4_food_meat_model_outliers_food, which = 'dfbetas')[2]

plot(h4_food_meat_model_outliers_food, which = 'dfbetas')[3]

plot(h4_food_meat_model_outliers_food, which = 'dfbetas')[4]

# which ones are so far out on cook's distance?
arrange_cook(h4_food_meat_model_outliers_food, "food")
## # A tibble: 6 x 2
##   food             cooks_distance
##   <chr>                     <dbl>
## 1 breakfast_tofu           0.0445
## 2 fish_tacos               0.0429
## 3 chicken_burger           0.0388
## 4 lamb_stew                0.0383
## 5 garlic_spaghetti         0.0360
## 6 cheeseburger             0.0297
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(h4_food_meat_model, type = c('p', 'smooth'))

Next, I get a p-value and plot the interaction. Both the main effect and interaction are significant. The effect behaves in the predicted way: participants liked meat-based food more than plant-based foods the more they eat meat.

h4_food_meat_model_p <- 
  mixed(
    rating1 ~ 
      food_type*meat_per_week_s +
      (1 + food_type | pp) +
      (1 | food),
    desire,
    control = lmerControl(
      optCtrl=list(maxfun=1e9)
    ),
    type = 3, 
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_food_meat_model_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ food_type * meat_per_week_s + (1 + food_type | pp) + 
## Model:     (1 | food)
## Data: desire
##                           num Df  den Df       F    Pr(>F)    
## food_type                      1  48.149 14.8759 0.0003407 ***
## meat_per_week_s                1 166.992  0.2394 0.6252816    
## food_type:meat_per_week_s      1 166.993 38.4992 4.172e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# effect size
r.squaredGLMM(h4_food_meat_model)
##            R2m       R2c
## [1,] 0.0426724 0.2574563
# simple slopes
emtrends(
  h4_food_meat_model,
  pairwise ~ food_type,
  var = "meat_per_week_s"
)
## $emtrends
##  food_type   meat_per_week_s.trend    SE  df asymp.LCL asymp.UCL
##  meat_based                   3.25 0.997 Inf      1.30      5.21
##  plant_based                 -4.13 1.144 Inf     -6.37     -1.88
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate   SE  df z.ratio p.value
##  meat_based - plant_based     7.38 1.19 Inf 6.205   <.0001 
## 
## Degrees-of-freedom method: asymptotic
plot(
  effects::effect("food_type:meat_per_week_s", 
         h4_food_meat_model),
         ci.style = "bands",
  multiline = TRUE) 

4.3.2 Exploratory

Excluding one potential outlier doesn’t change the results.

h4_food_meat_model_no_outliers_p <- 
  mixed(
    rating1 ~ 
      food_type*meat_per_week_s +
      (1 + food_type | pp) +
      (1 | food),
    desire %>% 
      filter(pp != "pp_87"),
    control = lmerControl(
      optCtrl=list(maxfun=1e9)
    ),
    type = 3, 
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_food_meat_model_no_outliers_p) 
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ food_type * meat_per_week_s + (1 + food_type | pp) + 
## Model:     (1 | food)
## Data: %>%
## Data: desire
## Data: filter(pp != "pp_87")
##                           num Df  den Df       F    Pr(>F)    
## food_type                      1  47.026 15.8894 0.0002326 ***
## meat_per_week_s                1 166.001  0.3777 0.5396939    
## food_type:meat_per_week_s      1 166.001 33.8970 2.929e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Excluding the two alleged vegetarians (one of whom is actually the outlier above) doesn’t change the results either.

h4_food_meat_model_no_vegetarians_p <- 
  mixed(
    rating1 ~ 
      food_type*meat_per_week_s +
      (1 + food_type | pp) +
      (1 | food),
    desire %>% 
      filter(!pp %in% c("pp_77", "pp_87")),
    control = lmerControl(
      optCtrl=list(maxfun=1e9)
    ),
    type = 3, 
    method = "S"
  )
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
anova(h4_food_meat_model_no_vegetarians_p) 
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ food_type * meat_per_week_s + (1 + food_type | pp) + 
## Model:     (1 | food)
## Data: %>%
## Data: desire
## Data: filter(!pp %in% c("pp_77", "pp_87"))
##                           num Df  den Df       F    Pr(>F)    
## food_type                      1  47.163 16.1672 0.0002079 ***
## meat_per_week_s                1 165.001  0.3178 0.5736798    
## food_type:meat_per_week_s      1 165.003 31.6440 7.769e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.4 Interaction effect of label type and food type

4.4.1 Confirmatory

For the food grouping, each food type could only be plant-based or meat-based, meaning we cannot include a random slope of food_type. That also means we cannot model the interaction as a random slope nested in food. We counterbalanced which food was assigned what label type across participants. Therefore, we will only model label_type as random slope for the food grouping.

In contrast, pp contains each combination of food_type and label_type because of counterbalancing. That means we can include the interaction as a random effect nested in pp.

The model yields a singularity warning again, but at an acceptable level of tolerance.

# set contrasts
options(contrasts = c("contr.sum", "contr.poly"))

desire_model_interaction <- lme4::lmer(
  rating1 ~
    food_type*label_type + 
    (1 + food_type*label_type | pp) +
    (1 + label_type | food),
  desire
)

summary(desire_model_interaction)
## Linear mixed model fit by REML ['lmerMod']
## Formula: rating1 ~ food_type * label_type + (1 + food_type * label_type |  
##     pp) + (1 + label_type | food)
##    Data: desire
## 
## REML criterion at convergence: 65111.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06247 -0.72465  0.09188  0.71941  3.01044 
## 
## Random effects:
##  Groups   Name                   Variance Std.Dev. Corr                   
##  pp       (Intercept)            115.414  10.743                          
##           food_type1              53.897   7.341   -0.18                  
##           label_type1             25.479   5.048    0.09  0.20            
##           label_type2             23.430   4.840   -0.02 -0.25 -0.16      
##           label_type3              7.341   2.709   -0.25 -0.22 -0.90 -0.12
##           food_type1:label_type1   8.289   2.879    0.18  0.13 -0.23 -0.52
##           food_type1:label_type2   7.208   2.685   -0.09  0.11  0.00  0.60
##           food_type1:label_type3   5.706   2.389    0.05 -0.05  0.09 -0.11
##  food     (Intercept)             68.321   8.266                          
##           label_type1              2.210   1.487    0.38                  
##           label_type2              2.785   1.669   -0.04  0.91            
##           label_type3              1.657   1.287    0.10 -0.36 -0.43      
##  Residual                        742.388  27.247                          
##                   
##                   
##                   
##                   
##                   
##                   
##   0.16            
##  -0.01 -0.88      
##   0.21 -0.54  0.60
##                   
##                   
##                   
##                   
##                   
## Number of obs: 6800, groups:  pp, 170; food, 40
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)             52.3634     1.5799  33.143
## food_type1               5.5830     1.4609   3.822
## label_type1              0.1249     0.7299   0.171
## label_type2             -1.7198     0.7315  -2.351
## label_type3              0.1257     0.6421   0.196
## food_type1:label_type1  -0.3359     0.6570  -0.511
## food_type1:label_type2  -0.2648     0.6631  -0.399
## food_type1:label_type3   0.3442     0.6345   0.542
## 
## Correlation of Fixed Effects:
##             (Intr) fd_ty1 lbl_t1 lbl_t2 lbl_t3 f_1:_1 f_1:_2
## food_type1  -0.037                                          
## label_type1  0.126  0.041                                   
## label_type2 -0.017 -0.049 -0.141                            
## label_type3 -0.017 -0.028 -0.425 -0.301                     
## fd_typ1:l_1  0.032  0.138 -0.040 -0.088  0.018              
## fd_typ1:l_2 -0.015 -0.001  0.000  0.095 -0.001 -0.213       
## fd_typ1:l_3  0.007  0.023  0.013 -0.016  0.020 -0.356 -0.261
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# check smallest parameter
getME(desire_model_interaction,"theta")
##                                   pp.(Intercept) 
##                                     3.942876e-01 
##                        pp.food_type1.(Intercept) 
##                                    -4.908749e-02 
##                       pp.label_type1.(Intercept) 
##                                     1.703543e-02 
##                       pp.label_type2.(Intercept) 
##                                    -3.584173e-03 
##                       pp.label_type3.(Intercept) 
##                                    -2.500236e-02 
##            pp.food_type1:label_type1.(Intercept) 
##                                     1.921486e-02 
##            pp.food_type1:label_type2.(Intercept) 
##                                    -9.092586e-03 
##            pp.food_type1:label_type3.(Intercept) 
##                                     4.001495e-03 
##                                    pp.food_type1 
##                                     2.649338e-01 
##                        pp.label_type1.food_type1 
##                                     4.130789e-02 
##                        pp.label_type2.food_type1 
##                                    -4.558926e-02 
##                        pp.label_type3.food_type1 
##                                    -2.732515e-02 
##             pp.food_type1:label_type1.food_type1 
##                                     1.786990e-02 
##             pp.food_type1:label_type2.food_type1 
##                                     9.399184e-03 
##             pp.food_type1:label_type3.food_type1 
##                                    -3.419942e-03 
##                                   pp.label_type1 
##                                     1.797878e-01 
##                       pp.label_type2.label_type1 
##                                    -1.804606e-02 
##                       pp.label_type3.label_type1 
##                                    -8.367411e-02 
##            pp.food_type1:label_type1.label_type1 
##                                    -3.043482e-02 
##            pp.food_type1:label_type2.label_type1 
##                                    -1.150626e-03 
##            pp.food_type1:label_type3.label_type1 
##                                     8.137286e-03 
##                                   pp.label_type2 
##                                     1.707136e-01 
##                       pp.label_type3.label_type2 
##                                    -2.861768e-02 
##            pp.food_type1:label_type1.label_type2 
##                                    -5.475913e-02 
##            pp.food_type1:label_type2.label_type2 
##                                     6.405823e-02 
##            pp.food_type1:label_type3.label_type2 
##                                    -9.860414e-03 
##                                   pp.label_type3 
##                                     2.638579e-02 
##            pp.food_type1:label_type1.label_type3 
##                                    -5.498343e-02 
##            pp.food_type1:label_type2.label_type3 
##                                     6.433824e-02 
##            pp.food_type1:label_type3.label_type3 
##                                     8.592097e-02 
##                        pp.food_type1:label_type1 
##                                     5.939813e-02 
## pp.food_type1:label_type2.food_type1:label_type1 
##                                    -3.597039e-02 
## pp.food_type1:label_type3.food_type1:label_type1 
##                                    -1.062679e-02 
##                        pp.food_type1:label_type2 
##                                     1.218370e-05 
## pp.food_type1:label_type3.food_type1:label_type2 
##                                     2.646709e-04 
##                        pp.food_type1:label_type3 
##                                     1.281739e-04 
##                                 food.(Intercept) 
##                                     3.033614e-01 
##                     food.label_type1.(Intercept) 
##                                     2.065374e-02 
##                     food.label_type2.(Intercept) 
##                                    -2.404173e-03 
##                     food.label_type3.(Intercept) 
##                                     4.623439e-03 
##                                 food.label_type1 
##                                     5.050447e-02 
##                     food.label_type2.label_type1 
##                                     6.120394e-02 
##                     food.label_type3.label_type1 
##                                    -2.030343e-02 
##                                 food.label_type2 
##                                     1.165671e-05 
##                     food.label_type3.label_type2 
##                                     1.205857e-02 
##                                 food.label_type3 
##                                     4.066253e-02
# not singular at tolerance level
isSingular(desire_model_interaction, tol = 1e-5) 
## [1] FALSE

Again, the model diagnostics look okay. There are no clear outlying participants, maybe pp_15 and pp_21. For food, cheeseburger stands out as a potential outlier. I’ll run the model without these cases as a robustness check in the exploratory section.

# inspect standardized residuals
densityplot(resid(desire_model_interaction, scaled = TRUE)) 

# q-q plot
car::qqPlot(resid(desire_model_interaction, scaled = TRUE))

## [1] 6799 2151
# proportion of residuals in the extremes of the distribution
lmer_residuals(desire_model_interaction)
## # A tibble: 3 x 4
##   standardized_cutoff proportion_cutoff proportion below_cutoff
##                 <dbl>             <dbl>      <dbl> <lgl>       
## 1                 2              0.05     0.0275   TRUE        
## 2                 2.5            0.01     0.00485  TRUE        
## 3                 3              0.0001   0.000294 FALSE
# obtain outliers
desire_model_interaction_outliers_pp <- influence.ME::influence(desire_model_interaction, c("pp"))
desire_model_interaction_outliers_food <- influence.ME::influence(desire_model_interaction, c("food"))

# plot formal outliers for pp
plot(desire_model_interaction_outliers_pp, which = 'cook')

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[1]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[2]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[3]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[4]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[5]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[6]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[7]

plot(desire_model_interaction_outliers_pp, which = 'dfbetas')[8]

# which ones are the highest
arrange_cook(desire_model_interaction_outliers_pp, "pp")
## # A tibble: 6 x 2
##   pp     cooks_distance
##   <chr>           <dbl>
## 1 pp_15          0.0165
## 2 pp_21          0.0155
## 3 pp_178         0.0128
## 4 pp_125         0.0127
## 5 pp_89          0.0124
## 6 pp_37          0.0113
# plot formal outliers for food
plot(desire_model_interaction_outliers_food, which = 'cook')

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[1]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[2]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[3]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[4]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[5]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[6]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[7]

plot(desire_model_interaction_outliers_food, which = 'dfbetas')[8]

# which ones are so far out on cook's distance?
arrange_cook(desire_model_interaction_outliers_food, "food")
## # A tibble: 6 x 2
##   food              cooks_distance
##   <chr>                      <dbl>
## 1 cheeseburger              0.0738
## 2 fish_tacos                0.0537
## 3 cauliflower_tacos         0.0457
## 4 lamb_stew                 0.0432
## 5 breakfast_tofu            0.0428
## 6 tuna_sandwich             0.0405
# plot the fitted vs. the residuals (to check for homo/heteroskedasticity) with added smoothed line.
plot(desire_model_interaction, type = c('p', 'smooth'))

Next, I obtain p-values. We get the same warning as above. As expected from the plots, only the main effect of food_type is significant.

# get p-value
desire_model_interaction_p <- mixed(
  rating1 ~
    label_type*food_type +
    (1 + label_type*food_type | pp) +
    (1 + label_type | food),
  desire,
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# summary
summary(desire_model_interaction_p)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: rating1 ~ label_type * food_type + (1 + label_type * food_type |  
##     pp) + (1 + label_type | food)
##    Data: data
## 
## REML criterion at convergence: 65112.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06551 -0.72444  0.09536  0.71936  3.01667 
## 
## Random effects:
##  Groups   Name                   Variance Std.Dev. Corr                   
##  pp       (Intercept)            115.417  10.743                          
##           label_type1             25.070   5.007    0.09                  
##           label_type2             22.520   4.746   -0.02 -0.14            
##           label_type3              7.088   2.662   -0.25 -0.97 -0.05      
##           food_type1              53.848   7.338   -0.18  0.21 -0.28 -0.12
##           label_type1:food_type1   8.082   2.843    0.18 -0.22 -0.57  0.29
##           label_type2:food_type1   7.098   2.664   -0.09 -0.01  0.65 -0.10
##           label_type3:food_type1   5.516   2.349    0.05  0.05 -0.03 -0.05
##  food     (Intercept)             68.291   8.264                          
##           label_type1              2.193   1.481    0.38                  
##           label_type2              2.772   1.665   -0.04  0.91            
##           label_type3              1.556   1.247    0.10 -0.37 -0.44      
##  Residual                        743.090  27.260                          
##                   
##                   
##                   
##                   
##                   
##                   
##   0.14            
##   0.11 -0.88      
##  -0.05 -0.57  0.61
##                   
##                   
##                   
##                   
##                   
## Number of obs: 6800, groups:  pp, 170; food, 40
## 
## Fixed effects:
##                        Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)             52.3634     1.5797 68.5932  33.147  < 2e-16 ***
## label_type1              0.1246     0.7282 84.4632   0.171 0.864550    
## label_type2             -1.7196     0.7278 70.1369  -2.363 0.020925 *  
## label_type3              0.1253     0.6392 40.0173   0.196 0.845602    
## food_type1               5.5830     1.4606 51.6037   3.822 0.000357 ***
## label_type1:food_type1  -0.3359     0.6560 67.3244  -0.512 0.610264    
## label_type2:food_type1  -0.2647     0.6626 60.5483  -0.399 0.690931    
## label_type3:food_type1   0.3442     0.6319 37.6309   0.545 0.589130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lbl_t1 lbl_t2 lbl_t3 fd_ty1 l_1:_1 l_2:_1
## label_type1  0.128                                          
## label_type2 -0.018 -0.138                                   
## label_type3 -0.017 -0.434 -0.292                            
## food_type1  -0.037  0.044 -0.055 -0.015                     
## lbl_typ1:_1  0.032 -0.038 -0.095  0.031  0.140              
## lbl_typ2:_1 -0.015 -0.001  0.100 -0.010 -0.002 -0.214       
## lbl_typ3:_1  0.007  0.008 -0.004 -0.005  0.022 -0.358 -0.262
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# get p-values
anova(desire_model_interaction_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: desire
##                      num Df den Df       F    Pr(>F)    
## label_type                3 55.166  2.0265 0.1207700    
## food_type                 1 51.604 14.6113 0.0003572 ***
## label_type:food_type      3 48.701  0.1923 0.9011246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# get approximate effect size
r.squaredGLMM(desire_model_interaction) 
##             R2m      R2c
## [1,] 0.03121892 0.288054

4.4.2 Exploratory

Just to explore, I also look at the pairwise comparisons between food_labels. None of the differences are large enough to reach significance.

desire_model_posthocs <- 
  emmeans(
    desire_model_interaction,
    pairwise ~ label_type
  )

desire_model_posthocs 
## $emmeans
##  label_type      emmean   SE  df asymp.LCL asymp.UCL
##  contextual        52.5 1.82 Inf      48.9      56.1
##  health_positive   50.6 1.73 Inf      47.3      54.0
##  neutral           52.5 1.70 Inf      49.2      55.8
##  sensory           53.8 1.71 Inf      50.5      57.2
## 
## Results are averaged over the levels of: food_type 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                      estimate   SE  df z.ratio p.value
##  contextual - health_positive  1.844714 1.10 Inf  1.671  0.3392 
##  contextual - neutral         -0.000817 1.16 Inf -0.001  1.0000 
##  contextual - sensory         -1.344314 1.30 Inf -1.032  0.7309 
##  health_positive - neutral    -1.845531 1.11 Inf -1.664  0.3428 
##  health_positive - sensory    -3.189028 1.35 Inf -2.364  0.0842 
##  neutral - sensory            -1.343497 1.10 Inf -1.224  0.6115 
## 
## Results are averaged over the levels of: food_type 
## Degrees-of-freedom method: asymptotic 
## P value adjustment: tukey method for comparing a family of 4 estimates

The conclusions don’t change when excluding potential outliers.

# get p-value
desire_model_interaction_no_outliers_p <- mixed(
  rating1 ~
    label_type*food_type +
    (1 + label_type*food_type | pp) +
    (1 + label_type | food),
  desire %>% 
    filter(!pp %in% c("pp15", "pp_21")) %>% 
    filter(food != "cheeseburger"),
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# get p-values
anova(desire_model_interaction_no_outliers_p) 
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: desire %>% filter(!pp %in% c("pp15", "pp_21"))
## Data: filter(food != "cheeseburger")
##                      num Df den Df       F    Pr(>F)    
## label_type                3 56.557  2.3040 0.0866413 .  
## food_type                 1 50.305 13.2710 0.0006373 ***
## label_type:food_type      3 48.764  0.4343 0.7293861    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Neither do conclusions change when excluding the two alleged vegetarians.

# get p-value
desire_model_interaction_no_vegetarians_p <- mixed(
  rating1 ~
    label_type*food_type +
    (1 + label_type*food_type | pp) +
    (1 + label_type | food),
  desire %>% 
    filter(!pp %in% c("pp77", "pp_87")),
  type = 3,
  method = "S"
)
## Fitting one lmer() model. [DONE]
## Calculating p-values. [DONE]
# get p-values
anova(desire_model_interaction_no_vegetarians_p)
## Mixed Model Anova Table (Type 3 tests, S-method)
## 
## Model: rating1 ~ label_type * food_type + (1 + label_type * food_type | 
## Model:     pp) + (1 + label_type | food)
## Data: %>%
## Data: desire
## Data: filter(!pp %in% c("pp77", "pp_87"))
##                      num Df den Df       F   Pr(>F)    
## label_type                3 59.143  2.2310 0.093967 .  
## food_type                 1 49.916 15.9357 0.000215 ***
## label_type:food_type      3 52.499  0.1579 0.924134    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.4 Intention to reduce eating meat and desire

Last, we want to know the correlation between the intention to reduce eating meat and desire for plant-based foods. I aggregate by participant first and then compute the correlation per food type.

desire_aggregated <- 
  desire %>% 
  group_by(pp, food_type) %>% 
  summarize(
    mean_rating = mean(rating1) # get aggregated desire ratings
  ) %>% 
  left_join( # add the intention to change diet
    .,
    desire %>% select(pp, change_diet) %>% group_by(pp) %>% slice(1) %>% ungroup(),
    by = "pp"
  )

# plot
ggplot(
  desire_aggregated,
  aes(
    x = change_diet,
    y = mean_rating,
    color = food_type
  )
) +
  geom_point() +
  geom_smooth(method = "lm")

desire_aggregated %>% 
  group_by(food_type) %>% 
  summarize(
    r = cor.test(mean_rating, change_diet)$estimate,
    p = cor.test(mean_rating, change_diet)$p.value
  )
## # A tibble: 2 x 3
##   food_type         r          p
##   <fct>         <dbl>      <dbl>
## 1 meat_based  -0.0438 0.571     
## 2 plant_based  0.342  0.00000493

5. Write final files

Last, I write the final analysis files. I also save the six models from which I will extract parameters for plotting (see plots.Rmd).

# save models
save(
  h1_simulations_model_no_slope,
  h1_simulations_model_no_slope,
  h4_food_meat_model,
  desire_model_interaction,
  file = here("plots", "models", "models_study2.RData")
)

# final file
write_csv(working_file, here("data", "study2", "analysis_file.csv"))

# attractiveness only
write_csv(desire, here("data", "study2", "desire.csv"))

# simulations only
write_csv(simulations, here("data", "study2", "simulations.csv"))